def main(): # Read form our list of sites sites = np.genfromtxt("site_full.txt", dtype="str") bot = Crawler(bm.BaseModel(), SKIP) bot.downloadHTML(sites) results = bot.parseCache("cache") # Output results print(results) # Output counts print(detected(results)) # Generate a results file with proper names names = [] for i in results: names.append((sites[int(i[0][:-5])], i[1])) # Save results as a text file np.savetxt("results.txt", names, fmt="%s")
def GET(self): basemodel_instance = basemodel.BaseModel() all_commits = basemodel_instance.get_data() return render.commits(all_commits)
def GET(self): basemodel_instance = basemodel.BaseModel() commit_instance = commit.Commit() all_commits = commit_instance.findAll() return render.commits(all_commits)
import json, basemodel from pprint import pprint all_data = basemodel.BaseModel().get_data() all_projects = all_data[1]['projects'] class Project: def __init__(self): self.id = None self.title = None self.description = None def find_all(self): return all_projects def find(self, key, value): projects_found = [ project for project in all_projects if project[key] == value ] return projects_found def find_one(self, key, value): project_found = [ project for project in all_projects if project[key] == value ] return project_found[0] p = Project()
# torch.backends.cudnn.benchmark = True # torch.backends.cudnn.enabled = True parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('--write_pred_file', default=None, type=str, help='prediction file') args = parser.parse_args() history_length = int(utils.config.get('train', 'historyLength')) input_data = utils.CoQADataset(utils.config.get('train', 'devFile'), history_length) test_loader = DataLoader(input_data[0:10], batch_size=1, shuffle=False) CUDA = torch.cuda.is_available() model = basemodel.BaseModel(384, 64, 30, use_gpu = eval(utils.config.get('train', 'useGPU'))) state_dict = torch.load('resources/model.pth') model.load_state_dict(state_dict) if CUDA and eval(utils.config.get('train', 'useGPU')): model = model.cuda() model.eval() start = time.time() batch_size = int(utils.config.get('train', 'batchSize')) with torch.no_grad(): for step, input_batch in enumerate(test_loader): gc.collect() print("Step " + str(step)) para_tokens = [x[0] for x in input_batch["para_tokens"]] batch_data = [] for q_a in input_batch["question_answer_list"]:
def train_model(): history_length = int(utils.config.get('train', 'historyLength')) input_data = utils.CoQADataset(utils.config.get('train', 'trainFile'), history_length) train_loader = DataLoader(input_data, batch_size=1, shuffle=True) model = basemodel.BaseModel(384, 64, 30, use_gpu=eval( utils.config.get('train', 'useGPU'))) CUDA = torch.cuda.is_available() if CUDA and eval(utils.config.get('train', 'useGPU')): model = model.cuda() model.train() start = time.time() batch_size = int(utils.config.get('train', 'batchSize')) lr = float(utils.config.get('train', 'learningRate')) beta1 = float(utils.config.get('train', 'beta1')) beta2 = float(utils.config.get('train', 'beta2')) l2_weight_decay = float(utils.config.get('train', 'l2WeightDecay')) optimizer = optim.Adam(model.parameters(), lr=lr, betas=(beta1, beta2), weight_decay=l2_weight_decay) # optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9) criterion = nn.NLLLoss() for step, input_batch in enumerate(train_loader): gc.collect() print("Step " + str(step)) para_tokens = [x[0] for x in input_batch["para_tokens"]] batch_data = [] for q_a in input_batch["question_answer_list"]: if q_a["invalid_flag"]: continue q_a["question"] = [ q_a["question"][0], [x[0] for x in q_a["question"][1]] ] for i in range(len(q_a["history_questions"])): q_a["history_questions"][i] = [ q_a["history_questions"][i][0], [x[0] for x in q_a["history_questions"][i][1]] ] for i in range(len(q_a["history_answers_input"])): q_a["history_answers_input"][i] = [ q_a["history_answers_input"][i][0], [x[0] for x in q_a["history_answers_input"][i][1]] ] for i in range(len(q_a["history_answers_span"])): q_a["history_answers_span"][i] = [ q_a["history_answers_span"][i][0], [x[0] for x in q_a["history_answers_span"][i][1]] ] q_a["para_tokens"] = para_tokens batch_data.append(copy.deepcopy(q_a)) print("batch start - " + str(len(batch_data))) for i in range(0, len(batch_data)): if batch_size * i >= len(batch_data): break batch_input = batch_data[batch_size * i:min(batch_size * (i + 1), len(batch_data))] optimizer.zero_grad() (prob_start, prob_end, prob_ans, start_pos, end_pos, ans_type) = model.forward(batch_input) if eval(utils.config.get('train', 'useGPU')): start_pos = torch.tensor(start_pos).cuda() end_pos = torch.tensor(end_pos).cuda() ans_type = torch.tensor(ans_type).cuda() else: start_pos = torch.tensor(start_pos) end_pos = torch.tensor(end_pos) ans_type = torch.tensor(ans_type) log_prob_start = torch.log(prob_start) log_prob_end = torch.log(prob_end) log_prob_ans = torch.log(prob_ans) loss_start = criterion(log_prob_start, start_pos) loss_end = criterion(log_prob_end, end_pos) loss_ans = criterion(log_prob_ans, ans_type) overall_loss = loss_start + loss_end + loss_ans if utils.config.get('train', 'useGPU'): del start_pos, end_pos, ans_type, log_prob_start, log_prob_end, log_prob_ans, loss_start, loss_end, loss_ans torch.cuda.empty_cache() overall_loss.backward() optimizer.step() del batch_input, prob_start, prob_end, prob_ans gc.collect() torch.cuda.empty_cache() print("batch end") if (step > 0) and (step % int(utils.config.get('train', 'saveModelFreq')) == 0) and (eval( utils.config.get('train', 'saveModel'))): torch.save(model.state_dict(), utils.config.get('train', 'savePath')) print("Model saved after %d steps" % step) if eval(utils.config.get('train', 'saveModel')): torch.save(model.state_dict(), utils.config.get('train', 'savePath')) print("Time taken: " + str(time.time() - start))