def split_dataset(dataset, labelname='label', test_ratio=0.3): """ 将输入dataset分解为训练集、测试集 :param dataset: (N,M) DataFrame :param labelname: String dataset中label列的名称 :param test_ratio: 测试集占比 :return: 训练集数据、训练集label、测试集数据、测试集label """ logging('Dataset: ', dataset.shape) """ 调整标签列的位置,方便以后数据切割 """ labels = dataset.pop(labelname) dataset.insert(dataset.shape[1], labelname, labels) pos_data = shuffle(dataset[dataset[labelname] == 1]) neg_data = shuffle(dataset[dataset[labelname] == 0]) cut = int(pos_data.shape[0] * test_ratio) """ 训练集使用与原始数据一致的正负样本比例 """ n_neg_train = neg_data.shape[0] / pos_data.shape[0] * pos_data.iloc[cut:].shape[0] test_data = shuffle(pd.concat((pos_data.iloc[:cut], neg_data.iloc[:cut]))) train_data = shuffle(pd.concat((pos_data.iloc[cut:], neg_data.iloc[-n_neg_train:]))) # test_data.to_csv(fn_raw_test, index=False) # train_data.to_csv(fn_raw_train, index=False) logging('Train set: ', train_data.shape, 'Test set: ', test_data.shape) return train_data.iloc[:, :-1], train_data.iloc[:, -1], test_data.iloc[:, :-1], test_data.iloc[:, -1]
def check_config(filename): """Check configuration file of backup2swift Argument: filename: config file path (default is ~/.bu2sw.conf) """ try: conf = configparser.SafeConfigParser(allow_no_value=False) except TypeError as error: msg = "__init__() got an unexpected keyword argument 'allow_no_value'" if str(error) == msg: # for argparse using python 2.6 conf = configparser.SafeConfigParser() else: utils.logging(3, error) conf.read(filename) try: auth_url = conf.get('swift', 'auth_url') username = conf.get('swift', 'username') password = conf.get('swift', 'password') rotate_limit = conf.get('backup', 'rotate_limit') except (configparser.NoSectionError, configparser.NoOptionError) as error: # syslog.ERR is 3 utils.logging(3, error) try: if conf.get('swift', 'ignore_verify_ssl_certification') == 'True': verify = False else: verify = True except (configparser.NoSectionError, configparser.NoOptionError): verify = True return auth_url, username, password, rotate_limit, verify
def experiment(FLAGS, perfmeasure, deepmethod, setting_no): # 5-fold cross validation + test # Input # XD: [drugs, features] sized array (features may also be similarities with other drugs # XT: [targets, features] sized array (features may also be similarities with other targets # Y: interaction values, can be real values or binary (+1, -1), insert value float("nan") for unknown entries # perfmeasure: function that takes as input a list of correct and predicted outputs, and returns performance # higher values should be better, so if using error measures use instead e.g. the inverse -error(Y, P) # foldcount: number of cross-validation folds for settings 1-3, setting 4 always runs 3x3 cross-validation dataset = DataSet( fpath=FLAGS.dataset_path, ### BUNU ARGS DA GUNCELLE setting_no=FLAGS.setting_no, ##BUNU ARGS A EKLE seqlen=FLAGS.max_seq_len, smilen=FLAGS.max_smi_len, need_shuffle=False) # set character set size FLAGS.charseqset_size = dataset.charseqset_size FLAGS.charsmiset_size = dataset.charsmiset_size data_6_folds = dataset.read_dataset(fpath=FLAGS.dataset_path, setting_no=setting_no) if not os.path.exists(FLAGS.fig_dir): os.makedirs(FLAGS.fig_dir) print(FLAGS.log_dir) S1_avgperf, S1_avgloss, S1_teststd = nfold_1_2_3_setting_sample( data_6_folds, perfmeasure, deepmethod, FLAGS, dataset) logging("Setting Negative Sample" + str(FLAGS.setting_no), FLAGS) logging( "avg_perf = %.5f, avg_mse = %.5f, std = %.5f" % (S1_avgperf, S1_avgloss, S1_teststd), FLAGS)
def _segment(self, features, labels): """ 对所有特征的值域进行分段 :param features: (N,M) :param labels: (N,1) :return: 经过排序的区间信息熵字典: key--区间, value--信息熵 """ assert isinstance(features, pd.core.series.Series), 'Features is not the instance of Series.' assert isinstance(labels, pd.core.series.Series), 'Labels is not the instance of Series.' assert features.shape[0] == labels.shape[0], 'The dimensions of features and label are unequal.' self.nfeature = features.shape[0] self.mincount = features.shape[0] * self.examplelimit # 最小区间样本数至少为样本总数的self.examplelimit self.pos_data = features[labels == 1].sort_values() self.neg_data = features[labels == 0].sort_values() sum_entropy = entropy(self.pos_data, self.neg_data) min_entorpy = sum_entropy * self.entropylimit # 区间信息熵字典: key--区间, value--信息熵; self.seg_ents = {(features.min(), features.max() + 1): sum_entropy} while len(self.seg_ents) < self.nbin: maxent = max(self.seg_ents.items(), key=lambda x: x[1]) if maxent[1] < min_entorpy: break interval = maxent[0] # 对熵值最大区间继续分割 pos_parts = self.pos_data[logical_and(self.pos_data >= interval[0], self.pos_data < interval[1])] neg_parts = self.neg_data[logical_and(self.neg_data >= interval[0], self.neg_data < interval[1])] # 如果该区间内某类别的特征值少于1个,没有必要再对该区间进行分裂. if pos_parts.unique().shape[0] <= 1 or neg_parts.unique().shape[0] <= 1: self.seg_ents[interval] -= 1.0 else: self._split_intervals(pos_parts, neg_parts, interval) logging(sorted(self.seg_ents.iteritems(), key=lambda d: d[0])) return sorted(self.seg_ents.keys())
def test(): def truths_length(truths): for i in range(50): if truths[i][1] == 0: return i return 50 model.eval() num_classes = model.num_classes total = 0.0 proposals = 0.0 correct = 0.0 device = torch.device("cuda" if use_cuda else "cpu") if model.net_name() == 'region': # region_layer shape = (0, 0) else: shape = (model.width, model.height) for data, target, org_w, org_h in test_loader: print("======") data = data.to(device) output = model(data) all_boxes = get_all_boxes(output, shape, conf_thresh, num_classes, use_cuda=use_cuda) for k in range(len(all_boxes)): boxes = all_boxes[k] correct_yolo_boxes(boxes, org_w[k], org_h[k], model.width, model.height) boxes = np.array(nms(boxes, nms_thresh)) truths = target[k].view(-1, 5) num_gts = truths_length(truths) total = total + num_gts num_pred = len(boxes) if num_pred == 0: continue proposals += int((boxes[:, 4] > conf_thresh).sum()) for i in range(num_gts): gt_boxes = torch.FloatTensor([ truths[i][1], truths[i][2], truths[i][3], truths[i][4], 1.0, 1.0, truths[i][0] ]) gt_boxes = gt_boxes.repeat(num_pred, 1).t() pred_boxes = torch.FloatTensor(boxes).t() best_iou, best_j = torch.max( multi_bbox_ious(gt_boxes, pred_boxes, x1y1x2y2=False), 0) # pred_boxes and gt_boxes are transposed for torch.max if best_iou > iou_thresh and pred_boxes[6][best_j] == gt_boxes[ 6][0]: correct += 1 precision = 1.0 * correct / (proposals + eps) recall = 1.0 * correct / (total + eps) fscore = 2.0 * precision * recall / (precision + recall + eps) logging("correct: %d, precision: %f, recall: %f, fscore: %f" % (correct, precision, recall, fscore))
def main(): try: args = parse_options() args.func(args) except RuntimeError as error: # syslog.ERR is 3 utils.logging(3, error)
def main(): # Validation parameters conf_thresh = FLAGS.conf_threshold nms_thresh = FLAGS.nms_threshold iou_thresh = FLAGS.iou_threshold # output file out_path = FLAGS.out_path # Training settings datacfg = FLAGS.data cfgfile = FLAGS.config data_options = read_data_cfg(datacfg) file_list = data_options['valid'] gpus = data_options['gpus'] # e.g. 0,1,2,3 ngpus = len(gpus.split(',')) num_workers = int(data_options['num_workers']) # for testing, batch_size is set to 1 (one) batch_size = FLAGS.batch_size global use_cuda use_cuda = torch.cuda.is_available() and use_cuda ############### torch.manual_seed(seed) if use_cuda: os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) global model model = Darknet(cfgfile) # model.print_network() init_width = model.width init_height = model.height kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {} val_loader = torch.utils.data.DataLoader( dataset.listDataset(file_list, shape=(init_width, init_height), shuffle=False, jitter=False, transform=transforms.Compose([ transforms.ToTensor(), ]), validate=True), batch_size=batch_size, shuffle=False, **kwargs) if use_cuda: if ngpus > 1: model = torch.nn.DataParallel(model) model = model.module model = model.to(torch.device("cuda" if use_cuda else "cpu")) for w in FLAGS.weights: # model.load_weights(w) checkpoint = torch.load(w) model.load_state_dict(checkpoint['model_state_dict']) logging('evaluating ... %s' % (w)) test(val_loader, conf_thresh, nms_thresh, iou_thresh, out_path, batch_size)
def askParameter(request, answer, paramName): '''ask parameter, check correctness and eventually log message''' ch=False while not ch: par = raw_input(request) ch=checkInput[paramName](par) logging(answer+" "+par) return par
def call_bowtie(input_ref, out_pref, log): query = ["bowtie2-build", "-f", input_ref, out_pref] utils.logging("[INFO] bowtie2-build is called as: %s" % (" ".join(query)), args) # make index with transformed genome f = open(log, 'w') p = subprocess.Popen(query, stdout=f) p.communicate()
def main(): # Training settings datacfg = FLAGS.data cfgfile = FLAGS.config data_options = read_data_cfg(datacfg) testlist = data_options['valid'] gpus = data_options['gpus'] # e.g. 0,1,2,3 ngpus = len(gpus.split(',')) num_workers = int(data_options['num_workers']) # for testing, batch_size is setted to 1 (one) batch_size = 1 # int(net_options['batch']) global use_cuda use_cuda = torch.cuda.is_available() and (True if use_cuda is None else use_cuda) ############### torch.manual_seed(seed) if use_cuda: os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) global model model = Darknet(cfgfile) #model.print_network() init_width = model.width init_height = model.height kwargs = { 'num_workers': num_workers, 'pin_memory': True } if use_cuda else {} global test_loader test_loader = torch.utils.data.DataLoader(dataset.listDataset( testlist, shape=(init_width, init_height), shuffle=False, transform=transforms.Compose([ transforms.ToTensor(), ]), train=False), batch_size=batch_size, shuffle=False, **kwargs) if use_cuda: if ngpus > 1: model = torch.nn.DataParallel(model) model = model.module model = model.to(torch.device("cuda" if use_cuda else "cpu")) for w in FLAGS.weights: model.load_weights(w) logging('evaluating ... %s' % (w)) test()
def readData(config): file_path = config['raw_data'] encoding = config['raw_encoding'] with open(file_path, 'r', encoding=encoding) as f: raw_data = csv.reader(f, delimiter='\t') raw_data = list(raw_data) logging('原始数据示例', raw_data[0:3]) return raw_data
def _post(self, url, data={}, headers=None, referer=u''): logging(u'<POST> "{}"'.format(url)) if headers: self.session.headers.update(headers) if referer: self.session.headers.update({ u'Referer': referer }) res = self.session.post(url, data=data) logging(u'<POST> "{}" - {}'.format(url, res.status_code)) return res
def _get(self, url, params={}, headers=None, referer=u''): logging(u'<GET> "{}"'.format(url)) if headers: self.session.headers.update(headers) if referer: self.session.headers.update({ u'Referer': referer }) res = self.session.get(url, params=params) logging(u'<GET> "{}" - {}'.format(url, res.status_code)) return res
def __init__(self): logging(u'Starting new manta session . . .') self.session = requests.Session() self.session.proxies = cfg.proxies self.session.headers = cfg.base_headers self._getMyIp() self._testSessionProxy() logging(u'Initializing request session . . .') self._buildCookiedSession(cfg.manta_base_url)
def cleanData(raw_data): sms_text = [] sms_label = [] for line in raw_data: sms_text.append(" ".join(preprocess(line[1]))) sms_label.append(line[0]) logging('预处理后的文本示例', sms_text[0:3]) logging('预处理后的标签示例', sms_label[0:3]) return sms_text, sms_label
def align(sc, args): import utils as g_utils import align_utils as a_utils ## broadcast raw reference ref_file = os.path.join(args.tempbase, "ref.fa") g_utils.read_hdfs(os.path.join(args.ref, "raw.fa"), ref_file) ref_dict = {} for chrid, seq in g_utils.read_fasta(ref_file): ref_dict[chrid] = (seq, len(seq)) g_utils.logging("[DEBUG] loading reference done", args) bc_refdict = sc.broadcast(ref_dict) ## read from hadoop readRDD = sc.textFile( args.input ) \ .map( lambda x: g_utils.line2kv( x)) if args.testmode == "balancing": readRDD = readRDD.partitionBy(args.nodes) readRDD = readRDD.cache() ## transform and get result of bowtie c2tTransRDD = readRDD.mapValues(lambda x: (x[0].translate( g_utils.make_trans_with("W", "C", "T")), x[1])) c2tMapRDD = c2tTransRDD.mapPartitionsWithIndex( lambda i, ptn: a_utils.mapping(i, "C2T", ["W_C2T", "C_C2T"], ptn, args )) g2aTransRDD = readRDD.mapValues(lambda x: (x[0].translate( g_utils.make_trans_with("W", "G", "A")), x[1])) g2aMapRDD = g2aTransRDD.mapPartitionsWithIndex( lambda i, ptn: a_utils.mapping(i, "G2A", ["W_G2A", "C_G2A"], ptn, args )) mergedRDD = sc.union([readRDD, c2tMapRDD, g2aMapRDD]) combRDD = mergedRDD.combineByKey( lambda v: [v],\ lambda lst, v: lst + [v],\ lambda l1, l2: l1 + l2 ) filteredRDD = combRDD.mapValues( lambda x: a_utils.select_and_find_uniq_alignment( x))\ .filter( lambda (k, v): v is not None ) # .filter( lambda (k, v): not (v is None)) if args.testmode == "balancing": filteredRDD = filteredRDD.partitionBy(args.nodes) methylRDD = filteredRDD.map( lambda x: a_utils.calc_methyl(x, bc_refdict.value, args.num_mm) )\ .filter( lambda x: x is not None ) result_path = os.path.join(args.output, "alignment") methylRDD.map(lambda x: a_utils.res_to_string(x)).saveAsTextFile( result_path) return result_path
def _validate_domains(self, url): parsed = urlparse(url) domain = u'.'.join(parsed.netloc.split(u'.')[-2:]) if any( map(lambda ad: domain.lower() == ad.lower(), self.allowed_domains)): return True else: logging(u'"{}" is not an allowed domain.'.format(domain)) return False
def test(): def truths_length(truths): for i in range(50): if truths[i][1] == 0: return i return 50 model.eval() num_classes = model.num_classes print("num", num_classes) total = 0.0 proposals = 0.0 correct = 0.0 device = torch.device("cuda" if use_cuda else "cpu") for _, (data, target) in enumerate(test_loader): data = data.to(device) output = model(data) all_boxes = get_all_boxes(output, conf_thresh, num_classes) for k in range(data.size(0)): boxes = all_boxes[k] boxes = np.array(nms(boxes, nms_thresh)) truths = target[k].view(-1, 5) num_gts = truths_length(truths) total = total + num_gts num_pred = len(boxes) if num_pred == 0: continue proposals += int((boxes[:, 4] > conf_thresh).sum()) for i in range(num_gts): gt_boxes = torch.FloatTensor([ truths[i][1], truths[i][2], truths[i][3], truths[i][4], 1.0, 1.0, truths[i][0] ]) gt_boxes = gt_boxes.repeat(num_pred, 1).t() pred_boxes = torch.FloatTensor(boxes).t() best_iou, best_j = torch.max( multi_bbox_ious(gt_boxes, pred_boxes, x1y1x2y2=False), 0) # pred_boxes and gt_boxes are transposed for torch.max if best_iou > iou_thresh and pred_boxes[6][best_j] == gt_boxes[ 6][0]: correct += 1 precision = 1.0 * correct / (proposals + eps) recall = 1.0 * correct / (total + eps) fscore = 2.0 * precision * recall / (precision + recall + eps) logging("precision: %f, recall: %f, fscore: %f" % (precision, recall, fscore))
def build_index(args): i_file = args.input utils.logging("[INFO] Start downloading reference file.", args) tempbase = utils.gen_file() utils.mkdir(tempbase) reffile = os.path.join(tempbase, "raw.fa") utils.read_hdfs(i_file, reffile) tempfiles = [ open(os.path.join(tempbase, "%s.fa" % m), 'w') for m in conv_way ] utils.logging("[INFO] Start transforming reference file.", args) # read ref for chrid, seq in utils.read_fasta(reffile): for i, method in enumerate(conv_way): (strand, a_from, a_to) = (method[0], method[2], method[4]) if strand == "W": tempfiles[i].write( ">%s\n%s\n" % (chrid, seq.translate(utils.make_trans_with(strand, a_from, a_to)))) else: tempfiles[i].write( ">%s\n%s\n" % (chrid, seq.translate(utils.make_trans_with(strand, a_from, a_to))[::-1])) # close all files for i, method in enumerate(conv_way): tempfiles[i].close() utils.logging("[INFO] Start launching bowtie2-build.", args) # run bowtie jobs procs = [] utils.mkdir(os.path.join(tempbase, "index")) for i, method in enumerate(conv_way): out_pref = os.path.join(tempbase, "index", method) build_log = out_pref + ".build.log" proc = Process(target=call_bowtie, args=( tempfiles[i].name, out_pref, build_log, )) procs.append(proc) proc.start() for proc in procs: proc.join() utils.logging("[INFO] Start uploading index file.", args) # move to hdfs utils.copy_to_hdfs(tempbase, args.output, remove_original=True)
def train_epoch(epoch, train_loader, config, writer=None): global processed_batches t0 = time.time() logging('epoch %d, processed %d samples, lr %f' % (epoch, epoch * len(train_loader.dataset), config.learning_rate)) model.train() processed_batches = 0 correct, total = 0, 0 f = open(config.logFile, 'a') for batch_idx, (X_batch, Y_batch, NX_batch) in enumerate(train_loader): processed_batches = processed_batches + 1 X_batch, Y_batch, NX_batch = X_batch.cuda().squeeze( 0), Y_batch.cuda().squeeze(0), NX_batch.cuda().squeeze(0) optimizer.zero_grad() output = model.forward(X_batch, NX_batch) # if len(output.shape) == 3: # output = output.reshape(config.batch_size*config.num_nodes, -1) # Y_batch = Y_batch.reshape(config.batch_size * config.num_nodes) loss = nn.CrossEntropyLoss()(output, Y_batch) pred = torch.argmax(output, dim=1) correct += torch.sum(pred.eq(Y_batch)) total += output.shape[0] acc = np.array(correct.cpu()) / total print('epoch: %d, processed_batches: %d, loss: %f' % (epoch, processed_batches, loss.item())) print('acc:', acc) f.write('%0.6f' % (loss.item()) + ' ' + '%0.6f' % (acc.item()) + '\n') loss.backward() optimizer.step() t1 = time.time() logging('training with %f samples/s' % (len(train_loader.dataset) / (t1 - t0))) f.close() if (epoch + 1) % config.save_interval == 0: torch.save( { 'epoch': epoch, 'seen': processed_batches, 'state_dict': model.state_dict() }, '%s/%06d.pkl' % ('backup', np.int(epoch / 12))) print("done")
class Session(object): _ip = None def __init__(self): logging(u'Starting new manta session . . .') self.session = requests.Session() self.session.proxies = cfg.proxies self.session.headers = cfg.base_headers self._getMyIp() self._testSessionProxy() logging(u'Initializing request session . . .') self._buildCookiedSession(cfg.manta_base_url) def _getMyIp(self): self._ip = json.load(urlopen(cfg.ip_url))[u'origin'] logging(u'Current Public IP: {}'.format(self._ip)) def _testSessionProxy(self): logging(u'Testing proxies . . .') try: r = self.session.get(cfg.ip_url, timeout=5) r.raise_for_status() r_json = json.loads(r.content) if self._ip in r_json[u'origin']: raise ValueError(u'Proxied IP same as original IP') except Exception, e: logging(u'Proxy test failed: {}'.format(unicode(e))) logging(u'Exiting process . . .') exit() logging(u'Proxy test passed! Current proxied IP: {}'.format(r_json[u'origin']))
def train(model, network_input, network_output, X_train, X_test, y_train, y_test, results_dir): callbacks_list = utils.model_callbacks(results_dir) utils.logging('Loaded model callbacks') utils.save_model_to_json(model, results_dir) utils.logging('Model saved to file: {}/{}'.format(results_dir, 'model.json')) history = model.fit(network_input, network_output, validation_data=(X_test, y_test), validation_split=0.33, epochs=200, batch_size=64, callbacks=callbacks_list, verbose=1, ) utils.generate_final_plots(history, results_dir)
def get_eval(lr=0.01, n_episodes=50, is_train=False, savefig=False): # mkdir print('qlearning_nn evaluating...') base_dir = './results/qlearning_nn' if not os.path.exists(base_dir): os.makedirs(base_dir) log_file = os.path.join(base_dir, 'qlearning_nn.log') logger = logging(log_file) results_file = os.path.join(base_dir, 'qlearning_nn.csv') if os.path.exists(results_file) and not is_train and not savefig: results = pd.read_csv(results_file) results = results.sort_values(by=['noisy', 'problem_id']) return results else: if os.path.exists(results_file): os.remove(results_file) if os.path.exists(log_file): os.remove(log_file) pkl_file = os.path.join( base_dir, 'qlearning_nn_lr={}_episodes={}.pkl'.format(lr, n_episodes)) if os.path.exists(pkl_file): q_learning_nn = pickle.load(open(pkl_file, 'rb')) else: q_learning_nn = train(lr=lr, n_episodes=n_episodes) # eval results = pd.DataFrame([], columns=[ 'problem_id', 'noisy', 'action', 'Total_rewards', 'avg_reward_per_action' ]) for problem_id, noisy, env in get_env(): states, rewards, actions = implement(env, q_learning_nn, 1, discount_factor=0.95) result = { 'problem_id': problem_id, 'noisy': noisy, 'Total_rewards': sum(rewards), 'avg_reward_per_action': sum(rewards) / len(actions) } results = results.append(pd.DataFrame(result, index=[0]), ignore_index=0) logger(' ' + str(result)) logger(actions) if savefig: get_fig(states, rewards) pic_name = os.path.join( base_dir, 'problem_id={} noisy={}.jpg'.format(problem_id, noisy)) plt.savefig(dpi=300, fname=pic_name) plt.close() env.close() results = results.sort_values(by=['noisy', 'problem_id']) results.to_csv(results_file, index=0) return results
def stats(self): delta = self._endTime - self._startTime logging(u'Crawling finished.') logging( unicode('This Run:\n' '{space}[Start At]\t{startTime}\n' '{space}[Finish At]\t{endTime}\n' '{space}[Total Runtime]\t{seconds} Seconds\n' '{space}[Success]\t{success}\n' '{space}[Failed]\t{failed}\n' '{space}[NG Domain]\t{ngad}'). format(space=u' ' * 26, success=self._success, failed=self._failed, ngad=self._ngad, startTime=self._startTime.strftime(u'%Y-%m-%d %H:%M:%S %Z'), endTime=self._endTime.strftime(u'%Y-%m-%d %H:%M:%S %Z'), seconds=delta.total_seconds()))
def crawl(ids): browser = open_browser() for i in tqdm(ids): href = df.stock_href[i] if not isinstance(href, str): continue file_id = href.replace('?', '').replace('=', '') if file_id in files: print(file_id, 'already downloaded \n') continue if 'quote' not in href: continue print(i, file_id) url = 'https://finance.yahoo.com' + href try: browser.get(url) except Exception as e: print(e) browser.quit() browser = open_browser() browser.get(url) while "无法访问" in browser.page_source or '未连接' in browser.page_source or '该网页无法正常运作' in browser.page_source: try: browser.quit() browser = open_browser() browser.get(url) e = 0 except Exception as e: print(e) browser.quit() browser = open_browser() browser.get(url) #time.sleep(30+random.randint(0,10)) html = browser.page_source.encode('utf-8') browser.execute_script("window.scrollTo(500,2000)") time.sleep(SLEEP+random.randint(0,10)) filename = PATH + file_id + '.html' save_page(html, filename) logging(PATH, filename, '200') time.sleep(SLEEP+random.randint(0,10))
def _buildCookiedSession(self, base_url, referer=u''): parsed = urlparse(base_url) # First request logging(u'Build Cookies Request #1:') res = self._get(parsed.geturl(), headers={ u'Accept': u'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', u'Upgrade-Insecure-Requests': u'1', u'Host': parsed.netloc, }, referer=referer) self.lastReferer = res.url # print res.content + '\n' # Second request to get js content JS = re.search(r'src\=\"\/(ser\-.*\.js)\"', res.content).group(1) url_2 = ParseResult(parsed.scheme, parsed.netloc, JS, u'', u'', u'').geturl() time.sleep(random.random()) logging(u'Build Cookies Request #2:') res = self._get(url_2, referer=self.lastReferer) # print res.content + '\n' # Third request to post js to get cookies PID = re.search(r'FingerprintWrapper\(\{path\:\"\/.*?\?(PID\=.*?)\"\,', res.content).group(1) AJAX = re.search(r'FingerprintWrapper.*?ajax_header\:\"(.*?)\"\,interval', res.content).group(1) url_3 = ParseResult(parsed.scheme, parsed.netloc, JS, '', PID, '').geturl() time.sleep(random.random()) logging(u'Build Cookies Request #3:') res = self._post(url_3, data={ u'p': proof(cfg.p) }, headers={ u'Accept': u'*/*', u'X-Distil-Ajax': AJAX }, referer=self.lastReferer) # print res.__dict__ return res
def eval(args): if args.is_log: file_name = os.path.basename(__file__) output_path = logging(file_name, verbose=2) user_embs, movie_embs, movie_cate_sim, Tr, Te = load_data(args) hidden_dim, num_movies = movie_embs.shape nor_embs = movie_embs.T.copy() for i in range(num_movies): nor_embs[i, :] = nor_embs[i, :] / np.linalg.norm(nor_embs[i, :]) sim_mat = np.dot(movie_embs, movie_embs.T) lamda_list = [0.1] # write date to excel # file = xlwt.Workbook(encoding='ascii') # table = file.add_sheet('cucb') row0 = list(range(0, args.num_bandit_iter, 1)) test_users = list(Te.keys()) num_test_users = len(test_users) for i in range(len(lamda_list)): test_precision = np.zeros(args.num_bandit_iter) test_recall = np.zeros(args.num_bandit_iter) test_div = np.zeros(args.num_bandit_iter) test_cate_div = np.zeros(args.num_bandit_iter) test_reward = np.zeros(args.num_bandit_iter) args.lam_da = lamda_list[i] t1 = time.clock() for user in test_users: prec, recall, div, cate_div, reward, s_inx0 = c2ucb(movie_embs.T, None, Te[user], args, num=args.num_bandit_iter, sim=sim_mat, cate_sim=movie_cate_sim, user_emb=None) print(user) print(prec) test_precision += prec test_recall += recall test_div += div test_cate_div += cate_div test_reward += reward test_precision = test_precision / num_test_users test_recall = test_recall / num_test_users test_div = test_div / num_test_users test_cate_div = test_cate_div / num_test_users test_reward = test_reward / num_test_users print("lambda:{0}\ntest_precision:{1}\ntest_recall:{2}\ntest_div:{3}\ntest_cate_div:{4}\ntest_reward:{5}".format(args.lam_da, test_precision, test_recall, test_div, test_cate_div, test_reward)) print("time used:%s\n" % (time.clock() - t1))
def get_eval(is_train=False, savefig=False): print('deterministic evaluating...') # mkdir # evaluate pic_dir = './results/Deterministic' if not os.path.exists(pic_dir): os.makedirs(pic_dir) log_file = os.path.join(pic_dir, 'Deterministic.log') logger = logging(log_file) results_file = os.path.join(pic_dir, 'Deterministic_results.csv') if os.path.exists(results_file) and not is_train and not savefig: results = pd.read_csv(results_file) results = results.sort_values(by=['noisy', 'problem_id']) return results else: if os.path.exists(results_file): os.remove(results_file) if os.path.exists(log_file): os.remove(log_file) results = pd.DataFrame([], columns=[ 'problem_id', 'noisy', 'action', 'Total_rewards', 'avg_reward_per_action' ]) for problem_id, noisy, env in get_env(): for act in range(4): func = MyDeterministicPolicy(act) states, rewards, actions = exec_policy(env, func, verbose=False) result = { 'problem_id': problem_id, 'noisy': noisy, 'action': act, 'Total_rewards': sum(rewards), 'avg_reward_per_action': sum(rewards) / len(actions) } results = results.append(pd.DataFrame(result, index=[0]), ignore_index=0) logger(' ' + str(result)) logger(actions) if savefig: get_fig(states, rewards) pic_name = os.path.join( pic_dir, 'problem_id={} noisy={} action={}.jpg'.format( problem_id, noisy, str(act))) plt.savefig(dpi=300, fname=pic_name) plt.close() env.close() results = results.sort_values(by=['noisy', 'problem_id']) results.to_csv(results_file, index=0) return results
def train(): config = Config() agent = Agent(config) Dev_loader = loader("data/" + config.dev_file + ".npz", batch_size = config.batch_size, Train = False) num_dev_batches = Dev_loader.max_batch train_losses = [] dev_losses = [] lowest_loss = 1e+10 for i in range(config.n_epoch): train_loss = 0.0 total_trained_batches = 0 for j in range(num_training_volumes): Train_loader = loader("data/" + config.train_file + "_vol_" + str(j) + ".npz", batch_size = config.batch_size, Train = True) num_train_batches = Train_loader.max_batch total_trained_batches += num_train_batches for k in range(num_train_batches): train_dic = Train_loader.get_batch() train_loss+=agent.run_train_step(train_dic) avg_train_loss = train_loss/total_trained_batches dev_loss = 0.0 dev_preds = [] dev_ys = [] for k in range(num_dev_batches): dev_dic = Dev_loader.get_batch() dev_batch_out, dev_batch_loss = agent.test(dev_dic) dev_pred = out_to_predict(dev_batch_out, version=2) dev_y = dev_dic["labels"] dev_ys += list(dev_y) dev_preds += list(dev_pred) dev_loss += dev_batch_loss precision , recall, accuracy , f1 = evaluate(np.array(dev_preds) , np.array(dev_ys)) avg_dev_loss = dev_loss/num_dev_batches logging(i+1, avg_train_loss , avg_dev_loss , precision , recall, accuracy , f1) if avg_dev_loss < lowest_loss: agent.save(i+1) lowest_loss = avg_dev_loss Train_loader.reset_loader() Dev_loader.reset_loader()
def start(self): self._success, self._failed, self._ngad = 0, 0, 0 self._startTime = now() if len(self.crawl_urls): logging(u'Total {} urls waiting to be crawled.'.format( len(self.crawl_urls))) try: for url in self.crawl_urls: if self._validate_domains(url): time.sleep(self.delay) response, html = self.request_proccess(url) if response is not None: self._success += 1 self.response_processor(response, html) else: self._failed += 1 else: self._ngad += 1 except Exception, e: self._failed += 1 logging(u'Crawling has been interrupted by exception.') self._endTime = now() self.stats()
def send_text(): """Sends the txt message from data passed through POST.""" if request.headers['Content-Type'] == 'application/json': #converts json to python dict data = request.json #get list of carriers from carriers.json config = load_config() carriers = load_carriers() #authenticate request if data['api_key'] == config['api_key']: if data['carrier'] in carriers: #prepare the message carrier_choice = data['carrier'] carrier = carriers[carrier_choice] number = data['number'] msg = data['msg'] to = "{0}{1}".format(number, carrier) sender = config['from'] #sends the actual message mail = smtplib.SMTP(config['smtp_address']) mail.starttls() mail.login(config['username'], config['password']) mail.sendmail(sender, to, msg) mail.quit() #prepare the json response. log = "Message: '{0}' was sent succesfuly sent to '{1}'.".format(msg, to) logging(log) resp = {"response" : log} response = Response(json.dumps(resp), status=200, mimetype='application/json') return response #if the carrier is not supported or found in the carriers list. else: log = "Carrier not supported." #log to web2txt.log file logging(log) resp = {"response" : log} response = Response(json.dumps(resp), status=404, mimetype='application/json') return response #if the content type is not json else: log = "Wrong request content-type. API only support JSON" #log to web2txt.log file logging(log) resp = {"response" : log} response = Response(json.dumps(resp), status=415, mimetype='application/json') return response
def printPossibleOptions(self): '''log possible options''' logging("Options") logging("1-Set target host/IP (Current: " + str(self.victim) + ")") logging("2-Set web app port (Current: " + str(self.webPort) + ")" ) logging("3-Set URI Path (Current: " + str(self.uri) + ")") logging("4-Set HTTP Request Method (1-GET/2-POST, current: "+str(self.httpMethod)+")") logging("5-Set my local Mongo/Shell IP (Current: " + str(self.myIP) + ")") logging("6-Set shell listener port (Current: " + str(self.myPort) + ")")
def close(self): self._connector.cursor().close() self._connector.close() logging("error", "DBM close!")