def build_emoji_sentiment_dictionary(): new_emoji_sentiment_filename = path + "/res/emoji/emoji_sentiment_dictionary.txt" if not os.path.exists(new_emoji_sentiment_filename): filename = path + "/res/emoji/emoji_sentiment_raw.txt" emojis = utils.load_file(filename)[1:] lines = [] for line in emojis: line = line.split(",") emoji = line[0] occurences = line[2] negative = float(line[4]) / float(occurences) neutral = float(line[5]) / float(occurences) positive = float(line[6]) / float(occurences) description = line[7] lines.append( str(emoji) + "\t" + str(negative) + "\t" + str(neutral) + "\t" + str(positive) + "\t" + description.lower()) utils.save_file(lines, new_emoji_sentiment_filename) emoji_sentiment_data = utils.load_file(new_emoji_sentiment_filename) emoji_sentiment_dict = {} for line in emoji_sentiment_data: line = line.split("\t") # Get emoji characteristics as a list [negative, neutral, positive, description] emoji_sentiment_dict[line[0]] = [line[1], line[2], line[3], line[4]] return emoji_sentiment_dict
def get_judgment(self, relpath, judgment): relurl = None reobj = re.search('judgmentID=(?P<id>\d+)', judgment['link']) if not reobj: self.logger.warning(u'No judgment id in %s' % judgment['link']) else: judgmentId = reobj.groupdict()['id'] relurl = os.path.join(relpath, judgmentId) filepath = os.path.join(self.rawdir, relurl) metapath = os.path.join(self.metadir, relurl) if not os.path.exists(filepath): pdfdoc = self.download_url(judgment['link'], \ loadcookies = self.cookiefile.name) if pdfdoc: utils.save_file(filepath, pdfdoc) self.logger.info(u'Saved %s' % relurl) else: self.logger.info(u'Did not download %s' % judgment['link']) if os.path.exists(filepath) and \ (self.updateMeta or not os.path.exists(metapath)): utils.print_tag_file(metapath, judgment['metainfo']) self.logger.info(u'Saved metainfo %s' % relurl) if not os.path.exists(filepath): relurl = None return relurl
def new_finance(request, *args, **kwargs): if not request.session.get('is_login', False) or request.session['user_type'] != 'accounts': return redirect("/index") error_msg, page, page_list, projects_dict, project_id = initialize(args, request, True) if not project_id: return redirect('/account/summary') if request.method == 'GET': return render(request, 'user_financenew.html', {"projects_dict": projects_dict, "project_id":project_id, "page_list":page_list, "page":page, 'error_msg':error_msg}) if request.method == 'POST': data = { "name":request.POST.get("name"), "process": request.POST.get("process"), "where": request.POST.get("where"), "money": request.POST.get("money"), "time" : request.POST.get("time"), "consumer": request.POST.get("consumer"), "status": "审核中", "auditor": '', } if not request.FILES.get("file"): error_msg = '请上传发票证明' return render(request, 'user_financenew.html', {"projects_dict": projects_dict, "project_id": project_id, "page_list": page_list, "page": page, 'error_msg': error_msg}) else: with open("database/finance.pk", 'rb') as f: finance = pickle.load(f) if finance.get(project_id, None) is None: finance[project_id] = [] finance[project_id].append(data) with open("database/finance.pk", 'wb') as f: f.write(pickle.dumps(finance)) utils.save_file(project_id, request, save_name="record%d"%(len(finance[project_id]))) return redirect("/account/finance&id=%d&page=%d" % (project_id, page))
def download_docs(self, docs, relpath, dateobj): downloaded = [] for doc in docs: if not doc.has_key(self.CASENO) or not doc.has_key(self.HREF): self.logger.info(u'Ignoring %s' % doc) continue caseno = doc[self.CASENO] href = doc[self.HREF] tmprel = os.path.join (relpath, caseno) rawpath = os.path.join (self.rawdir, tmprel) metapath = os.path.join (self.metadir, tmprel) if not os.path.exists(rawpath): self.logger.info(u'Downloading %s from %s' % (caseno, href)) webpage = self.download_url(doc[self.HREF]) if webpage: utils.save_file(rawpath, webpage) else: self.logger.warning(u'Could not download %s' % href) if os.path.exists(rawpath) and not os.path.isdir(rawpath): if not os.path.exists(metapath) or self.updateMeta: self.logger.info(u'Saving metapath for %s' % caseno) self.save_meta_tags(metapath, doc, dateobj) downloaded.append(tmprel) return downloaded
def convert_csv_to_vectors(url, output_url, start=0, header=0, one_hots=None, one_hots_dims=0): data = utils.load_file(url, False) dims = len(data[0].split(",")) - start if one_hots: dims = dims - len(one_hots) outputs = [] for d in data[header:100]: row = d.rstrip("\n").split(",") if one_hots: v = [0.0] * dims j = 0 for i_t, i_x in enumerate(row): if i_t >= start and not i_t in one_hots: v[j] = float(i_x) j += 1 if len(one_hots) == 1: o_h = to_one_hot(int(row[one_hots[0]]), one_hots_dims) v += o_h else: for idx in one_hots: o_h = to_one_hot(int(row[idx]), one_hots_dims) v += o_h outputs.append(v) else: row = [float(i) for i in row if i >= start] name = url.split("/")[-1] name = name.split(".")[0] print("output:", np.shape(outputs)) utils.save_file("%s/%s" % (output_url, name), outputs)
def get_judgment(self, url, relpath, metainfo): filename = utils.url_to_filename(url, False, ["yID", "nID", "ID"]) if not filename: self.logger.warning(u"No filename for %s" % url) return rel = os.path.join(relpath, filename) filepath = os.path.join(self.rawdir, rel) if os.path.exists(filepath): self.logger.info(u"Already exists %s" % filepath) else: self.logger.info(u"Downloading %s" % url) webpage = self.download_url(url, loadcookies=self.cookiefile.name) if not webpage: self.logger.warning(u"Could not download %s" % url) return utils.save_file(filepath, webpage) self.logger.info(u"Saved %s" % filepath) if os.path.exists(filepath): metapath = os.path.join(self.metadir, rel) if metainfo and (self.updateMeta or not os.path.exists(metapath)): utils.print_tag_file(metapath, metainfo) return rel
def GET(self,page_name,id): access = utils.page_access(page_name,utils.PERM_WRITE) if access is not None: return access data = web.input(title="",description="") try: content = utils.fetch_file(page_name) try: obj = json.loads(content) if obj["components"].has_key(id): obj["components"][id]["title"] = data.title; if ( obj["components"][id]["type"] == "note"): obj["components"][id]["description"] = data.description try: utils.save_file(page_name,json.dumps(obj)) if obj["components"][id]["type"] == "note": obj["components"][id]["description"] = textile.textile(obj["components"][id]["description"]) obj["components"][id]["id"] = id return utils.callback(json.dumps(obj["components"][id])) except IOError: utils.handle_error("failed to save file") except: utils.handle_error("failed to read file") except IOError: utils.handle_error("file not found")
def convert_vocab_to_text(vocabs): vocab_str = "" length = len(vocabs) i = 0 vocab_idx = dict() vocab_lst = list() idx_file = '%s/%s' % (folder, 'vocabs_idx.pkl') if u.check_file(idx_file): vocab_idx = u.load_file(idx_file) else: for key, value in vocabs.iteritems(): vocab_idx[value] = key u.save_file(idx_file, vocab_idx) lst_file = '%s/%s' % (folder, 'vocabs_list.pkl') if u.check_file(lst_file): vocab_lst = u.load_file(lst_file) else: for key in sorted(vocab_idx.iterkeys()): vocab_lst.append(vocab_idx[key]) u.save_file(lst_file, vocab_lst) regex = RegexpTokenizer(r'\w+') for w in vocab_lst: words = regex.tokenize(w) if len(words) != 0: w_ = '_'.join(words) i += 1 if i % 10000 == 0: print('Processed %i' % i) # break if i == length: vocab_str += '%s' % w_ else: vocab_str += '%s\n' % w_ return vocab_str
def replace(self, result): contents = utils.open_file(result.crawler_result.path) # Offset caused by replacing string with another of a (possibly) # different length. URL character indices need to be adjusted by # this value. offset = 0 for idx in range(result.num_urls): success = result.urls[idx].reached if success: url = result.crawler_result.urls[idx].url start_index = result.crawler_result.urls[idx].start_index end_index = result.crawler_result.urls[idx].end_index # Adjust index for offset start_index += offset end_index += offset # Update the offset offset += self._extra_chars # Insert in place of the original URL contents = contents[:start_index] + url + contents[end_index:] utils.save_file(result.crawler_result.path, contents)
def get_judgment(self, url, relpath, metainfo): filename = utils.url_to_filename(url, False, ['yID', 'nID', 'ID']) if not filename: self.logger.warning(u'No filename for %s' % url) return rel = os.path.join(relpath, filename) filepath = os.path.join(self.rawdir, rel) if os.path.exists(filepath): self.logger.info(u'Already exists %s' % filepath) else: self.logger.info(u'Downloading %s' % url) webpage = self.download_url(url, loadcookies=self.cookiefile.name) if not webpage: self.logger.warning(u'Could not download %s' % url) return utils.save_file(filepath, webpage) self.logger.info(u'Saved %s' % filepath) if os.path.exists(filepath): metapath = os.path.join(self.metadir, rel) if metainfo and (self.updateMeta or not os.path.exists(metapath)): utils.print_tag_file(metapath, metainfo) return rel
def create_wrapper_for_pages(page1_path, page2_path, output_filename): a = create_dom(page1_path) b = create_dom(page2_path) clean_dom(a) clean_dom(b) w = generate_wrapper(a, b) save_file(output_filename, w.stringify())
def corpus_segment(corpus_path, seg_path): ''' :param corpus_path: 未分词语料库路径 :param seg_path: 分词后语料库存储路径 ''' class_list = listdir_nohidden(corpus_path) for class_dir in class_list: class_path = corpus_path + "/" + class_dir + "/" seg_class_path = seg_path + "/" + class_dir + "/" if not os.path.exists(seg_class_path): os.makedirs(seg_class_path) file_list = listdir_nohidden(class_path) for file in file_list: full_path = class_path + file content = read_file(full_path) content_seg = jieba.cut(content) # 关键词提取,标引 # print(file, jieba.analyse.extract_tags(content, topK=5, withWeight=False, allowPOS=('ns', 'n', 'vn', 'v'))) save_file(seg_class_path + file, bytes(" ".join(content_seg), encoding="utf8")) # 将处理后的文件保存到分词后语料目录 print("===================*****====================") print("corpus_segment end") print("===================*****====================")
def convert_transport_data(url): print("Converting: %s" % url) name = url.split("/")[-1] name = name.split(".")[0] data = utils.load_file(url, False) year_length = (int(data[-1].rstrip("\n").split(",")[-1]) + 1) * 24 days = [[0] * 1024] * year_length old_h = -1 old_d = -1 one_hour = [] for x in data[1:]: rows = x.rstrip("\n").split(",") d = int(rows[-1]) h = int(rows[-2]) % 24 idx = int(rows[1]) if old_h != h: if old_h != -1: days[old_d * 24 + old_h] = one_hour one_hour = [0] * 1024 if idx < 1024: one_hour[idx] = float(rows[2]) old_h = h old_d = d days[old_d * 24 + old_h] = one_hour utils.save_file("vectors/transportation/%s.pkl" % name, days)
def pdb_to_lh5(traj, field): path = getattr(traj, field) data = load_file(path) new_fn = os.path.splitext(path)[0] + '.lh5' save_file(new_fn, data) os.unlink(path) setattr(traj, field, new_fn)
def get_judgment(self, relpath, postdata, href, metainfo): if not metainfo.has_key("title"): self.logger.warning(u"No title found for %s" % href) return None filename = self.get_filename(metainfo["title"]) if not filename: self.logger.warning(u"No filename for %s" % href) return None relurl = os.path.join(relpath, filename) rawpath = os.path.join(self.rawdir, relurl) metapath = os.path.join(self.metadir, relurl) if not os.path.exists(rawpath): judgment = self.download_link(postdata, href) if judgment: mtype = utils.get_buffer_type(judgment) if re.match("text/html", mtype): self.logger.warning(u"Err in downloading %s: Directed to a default website" % relurl) else: self.logger.info(u"Downloaded %s" % relurl) utils.save_file(rawpath, judgment) else: self.logger.info(u"Could not download %s" % relurl) if os.path.exists(rawpath): if metainfo and (self.updateMeta or not os.path.exists(metapath)): tags = utils.obj_to_xml("document", metainfo) utils.save_file(metapath, tags) return relurl else: return None
def do_translation_part(filename1, filename2, option='translation', header=True, cols=None): """ Get centroids """ if cols is None: centroid_interpolated = utils.get_data(filename1, cols=cols) else: centroid_interpolated = utils.get_data( filename1, cols=[cols[0], cols[1]]) # change cols if cx,cy,cz columns change centroid_translation = utils.get_data(filename2) """ Get interpolation """ xy_interpolated = utils.intepolate(centroid_interpolated, centroid_translation) """ Translate back """ centroid_translated_back = utils.translate(centroid_interpolated, xy_interpolated) """ Save translation file """ if cols is None: utils.save_file(filename1, centroid_translated_back, option=option, header=header, cols=cols) else: utils.save_file(filename1, centroid_translated_back, option=option, header=header, cols=[cols[0], cols[1] ]) # change cols if cx,cy,cz columns change return None
def get_judgment(self, relpath, postdata, href, metainfo): if not metainfo.has_key('title'): self.logger.warning(u'No title found for %s' % href) return None filename = self.get_filename(metainfo['title']) if not filename: self.logger.warning(u'No filename for %s' % href) return None relurl = os.path.join(relpath, filename) rawpath = os.path.join(self.rawdir, relurl) metapath = os.path.join(self.metadir, relurl) if not os.path.exists(rawpath): judgment = self.download_link(postdata, href) if judgment: mtype = utils.get_buffer_type(judgment) if re.match('text/html', mtype): self.logger.warning( u'Err in downloading %s: Directed to a default website' % relurl) else: self.logger.info(u'Downloaded %s' % relurl) utils.save_file(rawpath, judgment) else: self.logger.info(u'Could not download %s' % relurl) if os.path.exists(rawpath): if metainfo and (self.updateMeta or not os.path.exists(metapath)): tags = utils.obj_to_xml('document', metainfo) utils.save_file(metapath, tags) return relurl else: return None
def do_eigen_part(filename, option='eigen', header=True, cols=None): if cols is None: s_values = utils.get_data(filename, delim=',', header=header, cols=cols) else: s_values = utils.get_data(filename, delim=',', header=header, cols=[cols[0], cols[1]]) cov_M = np.zeros((s_values.shape[0], 3, 3)) s_M = np.zeros((s_values.shape[0], 3)) v_M = np.zeros((s_values.shape[0], 3, 3)) values = np.zeros((s_values.shape[0], 12)) for i in range(len(cov_M)): cov_M[i] = utils.make_covariance(s_values[i]) _, s_M[i], v_M[i] = utils.svd(cov_M[i]) values[i, 0:9] = v_M[i].reshape(9) values[i, 9:] = s_M[i] utils.save_file(filename, values, option=option, header=header, cols=None) return None
def export_hash_helper(): code = \ '''////////////////////////////////////////////////////////////////////////// /// This is an auto-generated script, please do not modify it manually /// ////////////////////////////////////////////////////////////////////////// using CE; ''' code += \ ''' public static class CEHashHelper { public static ICELoader CreateLoaderFromHash(uint hash) { ICELoader loader = null; switch (hash) { ''' for sheet_name, sheet_item in all_sheet.iteritems(): code += \ ''' case %s: { loader = new %s(); } break; ''' % (utils.bkdr_hash(sheet_name, BKDR_SEED), sheet_name) code += \ ''' } return loader; } } ''' utils.save_file(os.path.join(CSHARP_DIR, 'CEHashHelper.cs'), code)
def generate_vocabs(base_folder, all_vocab_folder, data): all_diffs_vocab_filename = os.path.join(all_vocab_folder, DIFFS_VOCAB_FILENAME) all_msgs_vocab_filename = os.path.join(all_vocab_folder, MSGS_VOCAB_FILENAME) for folder, diffs_filename, msgs_finame, diffs_at_least, msgs_at_least in data: diffs_word_freq = word_freq( os.path.join(base_folder, folder, diffs_filename), os.path.join(base_folder, folder, DIFFS_VOCAB_FILENAME), ) new_diffs_vocab = generate_new_vocab(diffs_word_freq, all_diffs_vocab_filename, diffs_at_least) msgs_word_freq = word_freq( os.path.join(base_folder, folder, msgs_finame), os.path.join(base_folder, folder, MSGS_VOCAB_FILENAME), ) new_msgs_vocab = generate_new_vocab(msgs_word_freq, all_msgs_vocab_filename, msgs_at_least) output_folder = os.path.join(base_folder, "vocabs", folder) if not os.path.isdir(output_folder): make_dirs(output_folder) save_file(os.path.join(output_folder, DIFFS_VOCAB_FILENAME), new_diffs_vocab) save_file(os.path.join(output_folder, MSGS_VOCAB_FILENAME), new_msgs_vocab)
def loadWordVectorsFromText(self, data_path="data"): self.initData() file_vectors = data_path + "/vectors.txt" file_mapping = data_path + "/vocabs.txt" if path.exists(file_vectors) and path.exists(file_mapping): with open(file_vectors, 'rb') as f: self.vectors = pickle.load(f) with open(file_mapping, 'rb') as f: self.vocabs = pickle.load(f) else: with open(self.src, 'rb') as f: word_vectors = f.readlines() vectors_length = len(word_vectors[0].split(' ')) - 1 tmp = list() tmp.append( np.zeros((1, vectors_length), dtype=theano.config.floatX)) for index, w_v in enumerate(word_vectors): els = w_v.split(' ') word = els[0] els[-1] = els[-1].replace('\n', '') vector = [[float(i) for i in els[1:]]] tmp.append(np.array(vector, dtype=theano.config.floatX)) self.vocabs[word] = index self.vectors = np.array(tmp, dtype=theano.config.floatX) utils.save_file(file_vectors, self.vectors) utils.save_file(file_mapping, self.vocabs)
def main(args): # Load file content. content = load_file(args.input, encoding=args.encoding) # Clean content. cleaned = clean(content, args.pattern) # Save cleaned content. save_file(args.output, cleaned, encoding=args.encoding)
def save_current_mail_data(self): if SMTPParser.current_mail_data != '': fullpath = form_path(self.source_ip, self.destination_ip, 'smtp_data_%d' % SMTPParser.file_counter) save_file(fullpath, SMTPParser.current_mail_data) SMTPParser.current_mail_data = '' SMTPParser.file_counter += 1
def build_vocab(anno_file, threshold): """Build a simple vocabulary wrapper.""" annos = load_file(anno_file) print('total QA pairs', len(annos)) counter = Counter() for (qns, ans) in zip(annos['question'], annos['answer']): # qns, ans = vqa['question'], vqa['answer'] # text = qns # qns +' ' +ans text = str(qns) + ' ' + str(ans) tokens = nltk.tokenize.word_tokenize(text.lower()) counter.update(tokens) counter = sorted(counter.items(), key=lambda item: item[1], reverse=True) save_file(dict(counter), 'dataset/VideoQA/word_count.json') # If the word frequency is less than 'threshold', then the word is discarded. words = [item[0] for item in counter if item[1] >= threshold] print(len(words)) # Create a vocab wrapper and add some special tokens. vocab = Vocabulary() vocab.add_word('<pad>') vocab.add_word('<start>') vocab.add_word('<end>') vocab.add_word('<unk>') # Add the words to the vocabulary. for i, word in enumerate(words): vocab.add_word(word) return vocab
def evaluation(request, *args, **kwargs): if not request.session.get('is_login', False) or request.session['user_type'] != 'accounts': return redirect("/index") error_msg, page, page_list, projects_dict, project_id = initialize(args, request, True) if not project_id: return redirect('/account/summary') eva_info = utils.get_eva_info(project_id) steps = utils.get_steps(project_id) step_path = utils.steps_path(utils.get_eva_info(project_id, top=True)) if request.method == 'GET': return render(request, 'user_evaluation.html', {"projects_dict": projects_dict, "project_id":project_id, "eva_info": eva_info, "steps": steps, "step_path": step_path, "page_list":page_list, "page":page, "error_msg":error_msg}) if request.method == 'POST': for num in [1,2,3,4,5]: if request.FILES.get("file"+str(num)): with open("database/evaluation.pk", 'rb') as f: eva = pickle.load(f) eva[project_id][num-1] = 1 with open("database/evaluation.pk", 'wb') as f: f.write(pickle.dumps(eva)) utils.save_file(project_id, request, "file"+str(num), "evaluation%d"%num) return redirect("/account/evaluation&id=%d&page=%d"%(project_id, page)) error_msg = '未选择文件' return render(request, 'user_evaluation.html', {"projects_dict": projects_dict, "project_id": project_id, "eva_info": eva_info, 'page_list': page_list, 'page': page, 'error_msg': error_msg})
def GET(self,page_name,id): access = utils.page_access(page_name,utils.PERM_WRITE) if access is not None: return access try: content = utils.fetch_file(page_name) try: obj = json.loads(content) if obj["components"].has_key(id): type = obj["components"][id]["type"] del obj["components"][id] ## Remove from order current_order = obj['order'].split(',') x = 0 for i in current_order: if int(i) == int(id): del current_order[x] x = x + 1 current_order = ",".join(current_order) obj['order'] = current_order try: utils.save_file(page_name,json.dumps(obj)) return utils.callback('{"id":"' + id + '", "type":"' + type +'"}') except IOError: utils.handle_error("failed to save file") else: utils.handle_error("key not found") except: utils.handle_error("failed to read file") except IOError: utils.handle_error("file not found")
def on_return(self, task): """Called by main thread on the return of data from the workers. Post-processing""" logger.info('Retrieved task %s', task.tag) traj = Session.query(models.Trajectory).get(int(task.tag)) try: # save lh5 version of the trajectory conf = load_file(self.project.pdb_topology_file) coordinates = msmbuilder.Trajectory.load_trajectory_file(str(traj.dry_xtc_fn), Conf=conf) save_file(traj.lh5_fn, coordinates) except Exception as e: logger.error('When postprocessing %s, convert to lh5 failed!', traj) logger.exception(e) raise # convert last_wet_snapshot to lh5 pdb_to_lh5(traj, 'last_wet_snapshot_fn') pdb_to_lh5(traj, 'init_pdb_fn') traj.host = task.host traj.returned_time = datetime.now() traj.length = len(coordinates) logger.info('Finished converting new traj to lh5 sucessfully')
def main(): print() n_classes = int(sys.argv[1]) model = int(sys.argv[2]) dataset_info = open_file( 'data/sign-to-gloss/cleaned/split-files/dataset-info-' + str(n_classes)) print('Dataset Info set size: ', len(dataset_info.keys())) print() train_phrase = open_file( 'data/sign-to-gloss/cleaned/split-files/train-phrase-' + str(n_classes)) val_phrase = open_file( 'data/sign-to-gloss/cleaned/split-files/val-phrase-' + str(n_classes)) test_phrase = open_file( 'data/sign-to-gloss/cleaned/split-files/test-phrase-' + str(n_classes)) print('Training Phrase set size: ', len(train_phrase)) print('Validation Phrase set size: ', len(val_phrase)) print('Testing Phrase set size: ', len(test_phrase)) print() batch_size = 50 vocab_size = n_classes + 2 parameters = { 'tar_vocab_size': vocab_size, 'emb_size': 512, 'rnn_size': 512, 'batch_size': batch_size, 'epochs': 20, 'train_steps_per_epoch': len(train_phrase) // batch_size, 'rate': 0.3, 'val_steps_per_epoch': len(val_phrase) // batch_size, 'test_steps': len(test_phrase) // batch_size, 'model': model } save_file( parameters, 'results/sign-to-gloss/wlasl-' + str(n_classes) + '/luong/model_' + str(model) + '/utils/parameters') print() print('No. of Training steps per epoch: ', parameters['train_steps_per_epoch']) print('No. of Validation steps per epoch: ', parameters['val_steps_per_epoch']) print('No. of Testing steps: ', parameters['test_steps']) print() train_dataset = tf.data.Dataset.from_tensor_slices( (train_phrase)).shuffle(len(train_phrase)) train_dataset = train_dataset.batch(batch_size, drop_remainder=True) val_dataset = tf.data.Dataset.from_tensor_slices( (val_phrase)).shuffle(len(val_phrase)) val_dataset = val_dataset.batch(batch_size, drop_remainder=True) test_dataset = tf.data.Dataset.from_tensor_slices( (test_phrase)).shuffle(len(test_phrase)) test_dataset = test_dataset.batch(batch_size, drop_remainder=True) print('Model Training started') print() #model_training(train_dataset, val_dataset, dataset_info, parameters) print('Model Testing started') print() model_testing(test_dataset, dataset_info, parameters)
def submit(self, traj): """ Submit a job to the work-queue for further sampling. Parameters ---------- """ if traj.submit_time is not None: raise ValueError("This traj has already been submitted") Session.add(traj) Session.flush() traj.populate_default_filenames() if not hasattr(traj, "init_pdb"): raise ValueError("Traj is supposed to have a pdb object tacked on") save_file(traj.init_pdb_fn, traj.init_pdb) remote_driver_fn = os.path.split(str(traj.forcefield.driver))[1] remote_pdb_fn = "input.pdb" remote_output_fn = "production_dry{}".format(traj.forcefield.output_extension) if traj.mode is None or traj.forcefield is None: raise ValueError("malformed traj") task = Task( "python ./{driver} {pdb_fn} {ff} {water} {mode} {threads}".format( pdb_fn=remote_pdb_fn, mode=traj.mode, driver=remote_driver_fn, ff=traj.forcefield.name, water=traj.forcefield.water, threads=traj.forcefield.threads, ) ) # why does traj.forcefield.driver come out as unicode? task.specify_input_file(str(traj.forcefield.driver), remote_driver_fn) task.specify_output_file(traj.wqlog_fn, "logs/driver.log") task.specify_input_file(traj.init_pdb_fn, remote_pdb_fn) task.specify_output_file(traj.dry_xtc_fn, remote_output_fn) if self.return_wet_xtc: # this is the XTC file with waters, generated by the driver # when you're doing implicit solvent only, this stuff is not used. remote_wet_output_fn = "production_wet{}".format(traj.forcefield.output_extension) task.specify_output_file(traj.wet_xtc_fn, remote_wet_output_fn) task.specify_output_file(traj.last_wet_snapshot_fn, "last_wet_snapshot.pdb") else: logger.debug("Not requesting production_wet%s from driver (implicit)", traj.forcefield.output_extension) task.specify_tag(str(traj.id)) task.specify_algorithm(WORK_QUEUE_SCHEDULE_FILES) # what does this do? traj.submit_time = datetime.now() # need to do a commit from this the qmaster, since this is a different # session Session.commit() self.wq.submit(task) logger.info("Submitted to queue: %s", traj)
def preload_vocabs(): vocabs = dict() load_file(vocabs, 'train-v1.1.json') load_file(vocabs, 'dev-v1.1.json') u.save_file('%s/%s' % (folder, 'vocabs.pkl'), vocabs) u.save_file('%s/%s' % (folder, 'vocabs_text.txt'), convert_vocab_to_text(vocabs), use_pickle=False)
def main(args): encoding = 'utf-8' # Load content of the file with all pairs. loaded_pairs = list(load_pairs(args.list, encoding=encoding)) # Reduce pairs -> concatenate pair + complement new_pairs_gen = reduce_pairs(loaded_pairs) # Save into output file save_file(args.output, new_pairs_gen, encoding=encoding)
def main(args): content_generator = load_file(args.transcript, encoding=args.encoding) rules = load_rules(args.rules, encoding=args.encoding) mapped = list(do_mapping(content_generator, rules)) formatted = format_data(mapped) save_file(args.output, formatted, encoding=args.encoding)
def download_oneday(self, relpath, dateobj): dateurl = urllib.basejoin(self.baseurl, '/hcjudge/date_output.php') postdata = [('d1', dateobj.day), ('m1', dateobj.month), \ ('y1', dateobj.year), ('d2', dateobj.day), \ ('m2', dateobj.month), ('y2', dateobj.year), \ ('button', 'Submit')] webpage = self.download_url(dateurl, postdata = postdata) if not webpage: self.logger.warning(u'No webpage for %s date: %s' % \ (dateurl, dateobj)) return [] d = utils.parse_webpage(webpage) if not d: self.logger.error(u'HTML parsing failed for date: %s' % dateobj) return [] newdls = [] for link in d.findAll('a'): href = link.get('href') title = utils.get_tag_contents(link) if (not href) or (not title): self.logger.warning(u'Could not process %s' % link) continue words = href.split('/') filename = words[-1] url = urllib.basejoin(dateurl, href) self.logger.info(u'link: %s title: %s' % (href, title)) relurl = os.path.join (relpath, filename) filepath = os.path.join(self.rawdir, relurl) metapath = os.path.join(self.metadir, relurl) if not os.path.exists(filepath): webpage = self.download_url(url) if not webpage: self.logger.warning(u'No webpage %s' % url) else: utils.save_file(filepath, webpage) self.logger.info(u'Saved %s' % url) newdls.append(relurl) if os.path.exists(filepath) and \ (self.updateMeta or not os.path.exists(metapath)): metainfo = self.get_meta_info(title, dateobj) if metainfo: utils.print_tag_file(metapath, metainfo) return newdls
def download_order(self, relpath, dateobj, metainfo, onclick): reobj = re.search('myfunViewDownLoad\s*\(\s*"(?P<ccin>\d+)"\s*,\s*"(?P<orderno>\d+)"\s*,\s*"(?P<flag>\w+)"\s*,\s*"(?P<casedetail>.+)"\s*,\s*"\w+"', onclick) if not reobj: self.logger.warning(u'Could not get parameters in onclick: %s' % onclick) return None groupdict = reobj.groupdict() ccin = groupdict['ccin'] orderno = groupdict['orderno'] flag = groupdict['flag'] casedetail = groupdict['casedetail'] metainfo['caseno'] = casedetail filename = self.get_filename(casedetail) if not filename: self.logger.warning(u'Could not get filename from %s' % casedetail) return None datestr = dateobj.__str__() utils.mk_dir(os.path.join(self.rawdir, self.name, datestr)) utils.mk_dir(os.path.join(self.metadir, self.name, datestr)) relurl = os.path.join(relpath, datestr, filename) filepath = os.path.join(self.rawdir, relurl) metapath = os.path.join(self.metadir, relurl) if os.path.exists(filepath): self.logger.warning(u'Raw file already exists, skipping: %s ' % relurl) else: #ccin_no=001016200801769&order_no=2&flag=v&casedetail=MISC.CIVIL+APPLICATION%2F1769%2F2008&download_token_value_id=1367853726545 self.logger.info(u'Downloading %s' % relurl) postdata = [('ccin_no', ccin), ('order_no', orderno), \ ('flag', flag), ('casedetail', casedetail), \ ('download_token_value_id', int(time.time())) ] webpage = self.download_url(self.orderurl, \ referer=self.caseurl,\ loadcookies = self.cookiefile.name,\ postdata = postdata) if webpage: self.logger.info(u'Saving %s' % filepath) utils.save_file(filepath, webpage) else: self.logger.warning(u'Could not download ccin: %s number: %s ' % (ccin, orderno)) if os.path.exists(filepath) and metainfo and \ (self.updateMeta or not os.path.exists(metapath)): self.logger.info(u'Metainfo: %s' % metainfo) utils.print_tag_file(metapath, metainfo) if os.path.exists(filepath): return relurl return None
def get_judgment(self, link, filepath): url = urllib.basejoin(self.courturl, link) self.logger.info(u'Downloading link %s' % url) webpage = self.download_url(url, loadcookies=self.cookiefile.name) if webpage: utils.save_file(filepath, webpage) return True else: return False
def download_judgment(self, link, filepath): url = urllib.basejoin(self.dateurl, link) self.logger.info(u'Downloading link %s' % url) webpage = self.download_url(url, loadcookies = self.cookiefile.name) if webpage: utils.save_file(filepath, webpage) return True else: return False
def update_versions(region): for OS in ["android", "ios", "macos", "windows"]: releases = session.get(f"https://sieve.services.riotcdn.net/api/v1/products/lol/version-sets/{region}?q[platform]={OS}", timeout=1) releases.raise_for_status() for release in json.loads(releases.content)["releases"]: path = f'{"LoL" if OS == "macos" or OS == "windows" else "TFT"}/{region}/{OS}/{release["release"]["labels"]["riot:artifact_type_id"]["values"][0]}' os.makedirs(path, exist_ok=True) save_file(f'{path}/{release["release"]["labels"]["riot:artifact_version_id"]["values"][0].split("+")[0]}.txt', release["download"]["url"])
def build_embedding(): vocabs = utils.load_file(vocabs_path, use_pickle=False) word_embedding = utils.load_glove() embedding = [] for w in vocabs: w = w.replace('\n', '') if w in word_embedding: embedding.append(word_embedding[w]) utils.save_file(embedding_path, embedding)
def datequery_result(self, webpage, relpath, pagenum, dateobj): downloaded = [] d = utils.parse_webpage(webpage) if not d: self.logger.error(u'Could not parse html of the result page for date %s' % dateobj) return downloaded stateval = self.extract_state(d) if stateval != None and stateval != self.stateval: self.stateval = stateval self.logger.info(u'stateval changed') linkdict = self.extract_links(d, pagenum) for link in linkdict['docs']: if (not link.has_key('title')) or (not link.has_key('href')): continue self.logger.info(u'Processing link: %s href: %s' % \ (link['title'], link['href'])) filename = re.sub('/', '|', link['title']) filename = re.sub("'", ' ', filename) tmprel = os.path.join (relpath, filename) rawpath = os.path.join (self.rawdir, tmprel) metapath = os.path.join (self.metadir, tmprel) if not os.path.exists(rawpath): webpage = self.download_judgment(link) if webpage: utils.save_file(rawpath, webpage) else: self.logger.warning(u'Could not download %s' % \ link['title']) if os.path.exists(rawpath) and not os.path.isdir(rawpath): if not os.path.exists(metapath) or self.updateMeta: self.save_meta_tags(metapath, link, dateobj) downloaded.append(tmprel) if linkdict.has_key('next'): link = linkdict['next'] self.logger.info(u'Following page: %s href: %s' % \ (link['title'], link['href'])) webpage = self.download_link(link) if webpage: nextdownloads = self.datequery_result(webpage, relpath, \ pagenum + 1, dateobj) downloaded.extend(nextdownloads) else: self.logger.warning(u'Could not download %s' % link['title']) return downloaded
def save_wallet(path: str, wallet: dict) -> None: """ Save wallet to the defined path. The fill will be saved as .json. :param path: :param wallet: :return: """ data: str = json.dumps(wallet, indent=4) save_file(path, f"{wallet['address']}.json", data)
def save_patterns(self): if False: string = "" keys = list(self.patterns.keys()) keys.sort() for k in keys: string += k + " = " + self.patterns[k] + "\n" utils.save_file(self.path, string)
def main(args): content_generator = load_file(args.transcript, encoding=args.encoding) rules = load_rules(args.rules, encoding=args.encoding) mapped = do_mapping(content_generator, rules) cleaned = clean(mapped) formatted = mlf_format_data(cleaned) save_file(args.output, formatted, encoding=args.encoding)
def datequery_result(self, webpage, relpath, pagenum, dateobj): downloaded = [] d = utils.parse_webpage(webpage) if not d: self.logger.error(u'Could not parse html of the result page for date %s' % dateobj) return downloaded stateval = self.extract_state(d) if stateval != None and stateval != self.stateval: self.stateval = stateval self.logger.info(u'stateval changed') linkdict = self.extract_links(d, pagenum) for link in linkdict['docs']: if (not link.has_key('title')) or (not link.has_key('href')): continue self.logger.info(u'Processing link: %s href: %s' % \ (link['title'], link['href'])) filename = re.sub('/', '|', link['title']) filename = re.sub("'", ' ', filename) tmprel = os.path.join (relpath, filename) rawpath = os.path.join (self.rawdir, tmprel) metapath = os.path.join (self.metadir, tmprel) if not os.path.exists(rawpath): webpage = self.download_link(link) if webpage: utils.save_file(rawpath, webpage) else: self.logger.warning(u'Could not download %s' % \ link['title']) if os.path.exists(rawpath) and not os.path.isdir(rawpath): if not os.path.exists(metapath) or self.updateMeta: self.save_meta_tags(metapath, link, dateobj) downloaded.append(tmprel) if linkdict.has_key('next'): link = linkdict['next'] self.logger.info(u'Following page: %s href: %s' % \ (link['title'], link['href'])) webpage = self.download_link(link) if webpage: nextdownloads = self.datequery_result(webpage, relpath, \ pagenum + 1, dateobj) downloaded.extend(nextdownloads) else: self.logger.warning(u'Could not download %s' % link['title']) return downloaded
def main(args): encoding = 'utf-8' # Load output of AP recognizer mlf_content = load_mlf(args.mlf, clean=False, encoding=encoding) csv_content = list(load_csv(args.csv, encoding=encoding)) # Process data and create content for output csv p_content = process(mlf_content, csv_content, isolated=args.isolated) # Save files save_file(args.output, p_content, encoding=encoding)
def main(args): encoding = 'utf-8' # Make scp scp_content_gen = make_scp(args.htkdir, [args.f1, args.f2]) save_file(args.outscp, scp_content_gen, encoding=encoding) # Make vocab dict_content = list(load_dict(args.dict, encoding=encoding)) vocab_gen = make_vocab(dict_content, args.w1, args.w2) save_file(args.outvocab, vocab_gen, encoding=encoding)
def main(): parser = get_parser() args = parser.parse_args() if args.doc: print __doc__ sys.exit() g = geosearchclass.GeoSearchClass() if args.params: print 'Using parameters from ' + str(args.params) # turn parameter file into dictionary g.set_params_from_file(args.params) if args.address: print "Finding geocoordates for address:\n{}".format(args.address) coords = geo_converter.get_geocoords_from_address(args.address) if coords: g.latitude = coords[0] print "Found this latitude:" print g.latitude g.longitude = coords[1] print "Found this longitude:" print g.longitude else: print "Failed to find coordinates. Exiting." sys.exit() if args.input: text = utils.load_file(args.input) tokens = utils.tokenize_normal_words(text) for_poem = utils.filter_words(tokens) else: for_poem = get_default_words() if args.markov: if args.input: raise StandardError("Can only input a single text file. \ use --markov <your_text_file.txt>") else: text = utils.load_file(args.markov) # ngram = ngrams.make_ngram(text, 2) ngram = ngrams.make_bigram_trigram_dictionary(text) formatted_poem = create_poem(g, for_poem, ngram) else: formatted_poem = create_poem(g, for_poem) if args.output: print '\nwriting formatted poem to ' + str(args.output) output_file = args.output else: print "\nwriting formatted poem to poem.txt" output_file = "poem.txt" utils.save_file(output_file, formatted_poem)
def main(args): encoding = 'utf-8' # Load output of AP recognizer mlf_content = load_mlf(args.mlf, clean=False, encoding=encoding) # Get important data from loaded content processed = process_data(mlf_content, args.isolated) # Convert data into csv content csv_data = content2csv(processed) # Save files save_file(args.output, csv_data, encoding=encoding)
def get_judgment(self, link, filepath): url = urllib.basejoin (self.baseurl, link) webpage = self.download_url(url) if webpage: self.logger.info(u'Successfully downloaded %s' % url) utils.save_file(filepath, webpage) return True else: self.logger.warning(u'Got empty page for %s' % url) return False
def parse(self): super().parse() self.processed = self.packet_size if self.is_code_type(FTP_TRANSFER_COMPLETE) and FTPParser.current_file_name is not None: fullpath = form_path(self.source_ip, self.destination_ip, FTPParser.current_file_name) save_file(fullpath, FTPParser.file, True) FTPParser.current_file_name = None FTPParser.file = b"" if FTPParser.current_file_name is not None: FTPParser.file += self.data elif self.is_code_type(FTP_TRANSFER_START): self.set_current_file_name()
def upload(rdb): name = request.forms.name filedata = request.files.file description = request.forms.description if name and filedata!=u'' and filedata.file: id = rdb.incr('file:global:last-id') filename = filedata.filename filelen = save_file(filedata.file, "%s.%s" % (id,filename)) #writing filedata to redis data = dict( id = id, name = name, filename = filename, ) if description: data.update(description=description) rdb.hmset('file:%s:data'%id, data) rdb.rpush('file:global:ids', id) return { 'filename' : filename, 'filelen' : filelen, 'fileid' : id, 'file_loaded' : True, 'url' : uploader.get_url, } return {'file_loaded' : False}
def edit_person(slug): try: p = Person.objects.get(site=g.site.domain, slug=slug) except Person.DoesNotExist: abort(404) if not g.site.domain == g.user: abort(403) if request.method == "POST": reqfile = request.files.get('file') if reqfile: f = File() f.site = g.site.domain f.name = reqfile.filename f.content_type = reqfile.mimetype f.slug, f.content_length = save_file(reqfile, request.config["UPLOAD_FOLDER"]) f.save() p.name = request.form.get("name") p.title = request.form.get("title") p.phone = request.form.get("phone") p.email = request.form.get("email") p.twitter = request.form.get("twitter") p.description = request.form.get("description") if reqfile: p.image_slug = f.slug p.save() return redirect(url_for(".show_person", slug=p.slug)) return render_template("edit_person.html", person=p)
def edit_post(year, month, day, slug): try: p = Post.objects.get(site=g.site.domain, year=year, month=month, day=day, slug=slug) except Post.DoesNotExist: abort(404) if not g.site.domain == g.user: abort(403) if request.method == "POST": reqfile = request.files.get("file") if reqfile: f = File() f.site = g.site.domain f.name = reqfile.filename f.content_type = reqfile.mimetype f.slug, f.content_length = save_file(reqfile, current_app.config["UPLOAD_FOLDER"]) f.save() p.name = request.form.get("name") p.text = request.form.get("text") if reqfile: p.image_slug = f.slug p.save() return redirect(url_for("post", year=p.year, month=p.month, day=p.day, slug=p.slug)) return render_template("edit_post.html", post=p)
def set_current_file_name(self): current_name = self.starting_line.split(' ')[1] if '?' in current_name: current_name = current_name.split('?')[0] if current_name.isnumeric(): return if HTTPParser.current_file_name is not None and HTTPParser.current_file_name != current_name: fullpath = form_path(self.source_ip, self.destination_ip, HTTPParser.current_file_name) save_file(fullpath, HTTPParser.current_file) HTTPParser.current_file = '' HTTPParser.current_file_name = current_name HTTPParser.current_src_ip = self.source_ip HTTPParser.current_dest_ip = self.destination_ip
def add_event(): def _dup(l): return [(i, i) for i in l] session = create_session() form = EventForm(request.forms.decode()) form.continent.choices = _dup(CONTINENTS) # countries = session.query(Country) # XXX: why is countries never used? # form.country.choices = [(c.id, c.name) for c in countries.all()] # form.gender.choices = _dup(["male", "female", "all"]) if form.validate(): session = create_session() new_task = Event.from_form(form) try: for attachment in scan_attachments(request.files): print "saving attachment: ", attachment, attachment.filename new_task.attachments.append(Attachment( file_path=save_file(attachment), file_name=attachment.filename, file_type=attachment.content_type)) session.add(new_task) session.commit() return redirect("/") except InvalidFileUpload as e: form.errors['attachment'] = e.message print dict(request.forms) print form.errors print type(form.errors) return new(errors=form.errors)
def download_debate(self, rel, dlurl, doc, dateobj): rawpath = os.path.join (self.rawdir, rel) metapath = os.path.join (self.metadir, rel) if os.path.exists(rawpath): return True else: webpage = self.download_url(dlurl) if webpage: utils.save_file(rawpath, webpage) self.logger.info(u'Saved %s' % rawpath) if os.path.exists(rawpath) and (self.updateMeta or not os.path.exists(metapath)): self.save_meta_tags(metapath, doc, dateobj) return True else: self.logger.warning(u'Could not download ' + dlurl) return False
def GET(self,page_name): access = utils.page_access(page_name,utils.PERM_WRITE) if access is not None: return access data = web.input(order="") try: content = utils.fetch_file(page_name) try: obj = json.loads(content) obj["order"] = data.order try: utils.save_file(page_name,json.dumps(obj)) return utils.callback('{"success":"1"}') except IOError: utils.handle_error("failed to save file") except: utils.handle_error("failed to read file") except IOError: utils.handle_error("file not found")
def parse(self): super().parse() self.processed = self.packet_size if self.sender is not None: if self.is_end_message(): POP3Parser.message += self.data file_name = self.sender + " " + self.subject + " " + self.date fullpath = form_path(self.source_ip, self.destination_ip, file_name) save_file(fullpath, self.create_letter()) self.sender = None else: POP3Parser.message += self.data if self.is_start_message(): self.parse_sender() self.parse_receiver() self.parse_date() self.parse_subject() self.slice_data(b'Encoding: quoted-printable:') self.slice_data(b'\r\n\r\n') POP3Parser.message = self.data