def compute_p_word_given_class(data_paths, vocab_size): """ Return a dictionary of word probabilities, P(word | class). All datapaths belong to the same class. Incorporate Laplacian Smoothing with k=1 here. p_word_given_class should include the probability of UNKNOWN_WORD, any word that doesn't appear in the training set """ p_word_given_class = dict() # compute number of words in the given class class_size = 0 for path in data_paths: message = open_file(path) words = get_words(message) class_size += len(words) # add elements to dictionary for path in data_paths: message = open_file(path) words = get_words(message) for word in words: if word in p_word_given_class: p_word_given_class[word] += 1 / (class_size + vocab_size + 1) else: p_word_given_class[word] = 2 / (class_size + vocab_size + 1) p_word_given_class['UNKNOWN_WORD'] = 1 / (class_size + vocab_size + 1) return p_word_given_class
def main(): """Prints a diff between two files.""" parser = optparse.OptionParser(usage='usage: %prog file-a file-b [options]') parser.add_option('--text-is-case-insensitive', action='store_false', dest='text_is_case_sensitive', default=True, help='<pfif:full_name>Jane</pfif:full_name> is the same as ' '<pfif:full_name>JANE</pfif:full_name>') parser.add_option('--no-grouping', action='store_false', default=True, dest='group_by_record_id', help='Rather than grouping all differences pertaining to ' 'the same record together, every difference will be ' 'displayed individually.') parser.add_option('--ignore-field', action='append', dest='ignore_fields', default=[], help='--ignore-field photo_url will mean that ' 'there will be no messages for photo_url fields that are ' 'added, removed, or changed. To specify multiple fields ' 'to ignore, use this flag multiple times.') parser.add_option('--omit-blank-fields', action='store_true', default=False, help='Normally, a blank field (ie, <foo></foo>) will count ' 'as a different against a file that does not have that ' 'field at all. If you pass this flag, a blank field will ' 'count as an omitted field.') (options, args) = parser.parse_args() assert len(args) >= 2, 'Must provide two files to diff.' messages = pfif_file_diff( utils.open_file(args[0]), utils.open_file(args[1]), text_is_case_sensitive=options.text_is_case_sensitive, ignore_fields=options.ignore_fields, omit_blank_fields=options.omit_blank_fields) print(utils.MessagesOutput.generate_message_summary(messages)) if options.group_by_record_id: print(utils.MessagesOutput.messages_to_str_by_id(messages)) else: print(utils.MessagesOutput.messages_to_str(messages))
def btn_click(self, event): if not os.path.exists(self.icon_path): return ext = str(Path(self.icon_path).suffix) save_path = self.icon_save_path + ext shutil.copy(self.icon_path, save_path) utils.open_file(save_path, True)
def dfc2018_loader(folder): img = open_file(folder + '2018_IEEE_GRSS_DFC_HSI_TR.HDR')[:,:,:-2] gt = open_file(folder + '2018_IEEE_GRSS_DFC_GT_TR.tif') gt = gt.astype('uint8') rgb_bands = (47, 31, 15) label_values = ["Unclassified", "Healthy grass", "Stressed grass", "Artificial turf", "Evergreen trees", "Deciduous trees", "Bare earth", "Water", "Residential buildings", "Non-residential buildings", "Roads", "Sidewalks", "Crosswalks", "Major thoroughfares", "Highways", "Railways", "Paved parking lots", "Unpaved parking lots", "Cars", "Trains", "Stadium seats"] ignored_labels = [0] palette = None # 自加,要不报错 return img, gt, rgb_bands, ignored_labels, label_values, palette
def __init__(self, src_path, tgt_path=None): super(NMTDataset, self).__init__() self.src = open_file(src_path) self.tgt = None if tgt_path is not None: self.tgt = open_file(tgt_path) self.size = len(self.src)
def read_features(pos_fname, neg_fname, chrom_lengths, bin_size): echo('Reading features:', pos_fname, neg_fname) features = dict( (chrom, [SKIP] * chrom_lengths[chrom]) for chrom in chrom_lengths) with open_file(pos_fname) as in_f: for l in in_f: chrom, start, end = l.strip().split('\t')[:3] start_bin = int(start) / bin_size end_bin = int(end) / bin_size if chrom not in features: features[chrom] = [] for bin_i in xrange(start_bin, end_bin + 1): features[chrom][bin_i] = POS if neg_fname is not None: with open_file(neg_fname) as in_f: for l in in_f: chrom, start, end = l.strip().split('\t')[:3] start_bin = int(start) / bin_size end_bin = int(end) / bin_size if chrom not in features: features[chrom] = [] for bin_i in xrange(start_bin, end_bin + 1): features[chrom][bin_i] = NEG else: for chrom in features: for bin_i in xrange(len(features[chrom])): if features[chrom][bin_i] != POS: features[chrom][bin_i] = NEG return features
def main(): print() model = int(sys.argv[1]) file_name = sys.argv[2] n_classes = int(sys.argv[3]) class_list = text_retrieve('class_list.txt') val_phrase = open_file('data/sign-to-gloss/cleaned/split-files/' + file_name + '-phrase-' + str(n_classes)) dataset_info = open_file( 'data/sign-to-gloss/cleaned/split-files/dataset-info-' + str(n_classes)) tar_lines, pred_lines = [], [] for i in range(0, len(val_phrase)): print(i) inp, tar = create_batch([val_phrase[i]], dataset_info, n_classes) pred = translate(inp, model, n_classes) tar, pred = convert_tar_pred(list(tar[0][1:-1]), pred, class_list) print('Target phrase: ', tar) print('Predict phrase: ', pred) print() tar_lines.append(tar) pred_lines.append(pred) tar_text = lines_to_text(tar_lines, '\n') pred_text = lines_to_text(pred_lines, '\n') text_save( tar_text, str(n_classes) + '/luong/model_' + str(model) + '/predictions/' + file_name + '_tar.txt') text_save( pred_text, str(n_classes) + '/luong/model_' + str(model) + '/predictions/' + file_name + '_pred.txt')
def edit_dataset(src_path, tgt_path): x = open_file(src_path) y = open_file(tgt_path) i = 0 residual = [] with open("./nep_dataset/src.txt", "w", encoding="utf8") as f: for sent in x: i += 1 if len(sent.strip()) < 6: residual.append(i) continue if sent.strip()[-1] not in ['ред', '?', '!']: f.write(sent + 'ред\n') else: f.write(sent + '\n') j = 0 with open("./nep_dataset/tgt.txt", "w", encoding="utf8") as f: for sent in y: j += 1 if j in residual: continue if sent.strip()[-1] not in ['.', '!', '?']: f.write(sent + '.\n') else: f.write(sent + '\n')
def __get_login_qrcode(self): """获取登录二维码并自动打开 https://qr.m.jd.com/show?appid=133&size=147&t=1611304511060 return: boolean """ url = "https://qr.m.jd.com/show" payload = { "appid": 133, "size": 147, "t": utils.get_current_json_timestamp() } r = self.session.get(url=url, params=payload, headers=self.headers) try: r.raise_for_status() except requests.HTTPError: logging.error(f"登录二维码获取({r.status_code}):{r.text}") return False qrcode_name = path("qrcode.png").abs_path_str() utils.dumps_bytes_to_file(r, qrcode_name) utils.open_file(qrcode_name) logging.info(f"登录二维码已生成:{qrcode_name}") logging.info(f"登录二维码已弹出,请打开京东 APP 扫码登录!") return True
def get_fixed_sets(run, folder_name, dataset_name, mode='both'): """ Load fixed training and test set from the file. Arguments: run - number of a current run folder_name - folder with files dataset_name - name of a loaded dataset mode - 'both': train and test set, 'train': only training set 'test': only test set """ if mode in ('both', 'train'): train_gt = open_file(folder_name + dataset_name + '/train_gt_' + str(run) + '.npz') if mode == 'train': return train_gt if mode in ('both', 'test'): test_gt = open_file(folder_name + dataset_name + '/test_gt_' + str(run) + '.npz') if mode == 'test': return test_gt else: return train_gt, test_gt else: print('Wrong name of the mode parameter!') pass
def main(): print() n_classes = int(sys.argv[1]) model = int(sys.argv[2]) dataset_info = open_file( 'data/sign-to-gloss/cleaned/split-files/dataset-info-' + str(n_classes)) print('Dataset Info set size: ', len(dataset_info.keys())) print() train_phrase = open_file( 'data/sign-to-gloss/cleaned/split-files/train-phrase-' + str(n_classes)) val_phrase = open_file( 'data/sign-to-gloss/cleaned/split-files/val-phrase-' + str(n_classes)) test_phrase = open_file( 'data/sign-to-gloss/cleaned/split-files/test-phrase-' + str(n_classes)) print('Training Phrase set size: ', len(train_phrase)) print('Validation Phrase set size: ', len(val_phrase)) print('Testing Phrase set size: ', len(test_phrase)) print() batch_size = 50 vocab_size = n_classes + 2 parameters = { 'tar_vocab_size': vocab_size, 'emb_size': 512, 'rnn_size': 512, 'batch_size': batch_size, 'epochs': 20, 'train_steps_per_epoch': len(train_phrase) // batch_size, 'rate': 0.3, 'val_steps_per_epoch': len(val_phrase) // batch_size, 'test_steps': len(test_phrase) // batch_size, 'model': model } save_file( parameters, 'results/sign-to-gloss/wlasl-' + str(n_classes) + '/luong/model_' + str(model) + '/utils/parameters') print() print('No. of Training steps per epoch: ', parameters['train_steps_per_epoch']) print('No. of Validation steps per epoch: ', parameters['val_steps_per_epoch']) print('No. of Testing steps: ', parameters['test_steps']) print() train_dataset = tf.data.Dataset.from_tensor_slices( (train_phrase)).shuffle(len(train_phrase)) train_dataset = train_dataset.batch(batch_size, drop_remainder=True) val_dataset = tf.data.Dataset.from_tensor_slices( (val_phrase)).shuffle(len(val_phrase)) val_dataset = val_dataset.batch(batch_size, drop_remainder=True) test_dataset = tf.data.Dataset.from_tensor_slices( (test_phrase)).shuffle(len(test_phrase)) test_dataset = test_dataset.batch(batch_size, drop_remainder=True) print('Model Training started') print() #model_training(train_dataset, val_dataset, dataset_info, parameters) print('Model Testing started') print() model_testing(test_dataset, dataset_info, parameters)
def convertData(data_files, modelname, posorneg): toRet = [] toRet2 = [] if modelname == "naive": for filename in data_files: vector = [] raw_data = open_file(filename) tokenized_data = word_tokenize(raw_data.lower()) length = 1 * len(tokenized_data) - 1 for word in bowvectorsource: #presence if word in tokenized_data: vector.append(1) else: vector.append(0) #normalized frequency sum = 0 #for tokword in tokenized_data: # if word == tokword: # sum += 1 / length #vector.append(sum) for i, word in enumerate(bowvectorsource): if word in tokenized_data: vector.append(1) else: vector.append(0) #for tokword in tokenized_data: # if word == tokword: # sum += 1 / length toRet.append(vector) toRet2.append(posorneg) elif modelname[:6] == "concat": epoch = modelname[6:] model1 = Doc2Vec.load("dbow_" + epoch + "e.model") model2 = Doc2Vec.load("dm_" + epoch + "e.model") for filename in data_files: raw_data = open_file(filename) #print(raw_data) tokenized_data = word_tokenize(raw_data.lower()) #print(tokenized_data) vector1 = model1.infer_vector(tokenized_data) vector2 = model2.infer_vector(tokenized_data) toRet.append(vector1 + vector2) toRet2.append(posorneg) else: model= Doc2Vec.load(modelname) for filename in data_files: raw_data = open_file(filename) #print(raw_data) tokenized_data = word_tokenize(raw_data.lower()) #print(tokenized_data) vector = model.infer_vector(tokenized_data) toRet.append(vector) toRet2.append(posorneg) return toRet, toRet2
def build_file(filename, outfilename, root=u'.', create_dir=True): filepath = os.path.join(root, filename) with utils.open_file(filepath) as infile: try: output = tags.render(infile.read(), filename=filename, rootdir=root) except templatelang.ParseBaseException as e: utils.print_parse_exception(e, filename) return with utils.open_file(outfilename, "w", create_dir=create_dir) as outfile: outfile.write(output)
def gen_anchors(self, output_path, num_clusters): """ Generate anchors, which tell YOLO in which scale objects are, which need to be trained and detected. :param output_path: path, where YOLO output files should be stored :param num_clusters: number of clusters need to be found :return: string of all generated anchors rounded to 2 decimals """ np.seterr(divide='ignore', invalid='ignore') train_txt_path = os.path.join(output_path, self.TRAIN_TXT_FILENAME) f = utils.open_file(train_txt_path) lines = [line.rstrip('\n') for line in f.readlines()] annotation_dims = [] for line in lines: line = line.replace('JPEGImages', 'labels') line = line.replace('.jpg', '.txt') line = line.replace('.png', '.txt') yolo_img_txt_filename = os.path.join( output_path, line.split('/')[-1]) # extract filenames out of 'train.txt' f2 = utils.open_file(yolo_img_txt_filename) for line in f2.readlines(): line = line.rstrip('\n') w, h = line.split(' ')[3:] annotation_dims.append(tuple(map(float, (w, h)))) annotation_dims = np.array(annotation_dims) if num_clusters == 0: for num_clusters_ in range(1, 11): # we make 1 through 10 clusters anchors_str = None # kmeans returns None, if no anchors are found, then try it with new random indices while anchors_str is None: indices = [ random.randrange(annotation_dims.shape[0]) for i in range(num_clusters_) ] centroids = annotation_dims[indices] anchors_str = self.kmeans(annotation_dims, centroids) else: anchors_str = None # kmeans returns None, if no anchors are found, then try it with new random indices while anchors_str is None: indices = [ random.randrange(annotation_dims.shape[0]) for i in range(num_clusters) ] centroids = annotation_dims[indices] anchors_str = self.kmeans(annotation_dims, centroids) return anchors_str
def write(self, outfilepath=None, mode="w"): """Write objects in file to disk. Either you can write to a new file (if outfilepath is given), or you can append what's in storelist to the current filepath. """ # If outfilepath is specified, iterate over every object in self # (which might come from something else) and every object in # self.storelist and write them out to disk if outfilepath: with open_file(outfilepath, mode) as outfile: logging.info("Writing to disk...") outfile.write(self.get_header()) for obj in self.source: line = self.obj_to_str(obj) outfile.write(line) for obj in self.storelist: line = self.obj_to_str(obj) outfile.write(line) # Clear storelist now that they've been written to disk self.clear_storelist() # Update file attributes (in case of new or converted file) self.source = self self.filepath = outfilepath self.is_new = False # If outfilepath is not specified, simply iterate over every # object in self.storelist and append them to the file on disk. else: # If the file is new and the path already exist, do not append if self.is_new and os.path.exists(self.filepath): raise CancerApiException( "Output file already exists: {}".format(self.filepath)) with open_file(self.filepath, "a+") as outfile: logging.info("Writing to disk...") # If the file is new, start with header if self.is_new: outfile.write(self.get_header()) # If file is new and source is not self (i.e., converted file), # iterate over source if self.is_new and self.source is not self: for obj in self.source: line = self.obj_to_str(obj) outfile.write(line) # Proceed with iterating over storelist for obj in self.storelist: line = self.obj_to_str(obj) outfile.write(line) # Clear storelist now that they've been written to disk self.clear_storelist() # Update file attributes (in case of new or converted file) self.source = self self.is_new = False
def write(self, outfilepath=None, mode="w"): """Write objects in file to disk. Either you can write to a new file (if outfilepath is given), or you can append what's in storelist to the current filepath. """ # If outfilepath is specified, iterate over every object in self # (which might come from something else) and every object in # self.storelist and write them out to disk if outfilepath: with open_file(outfilepath, mode) as outfile: logging.info("Writing to disk...") outfile.write(self.get_header()) for obj in self.source: line = self.obj_to_str(obj) outfile.write(line) for obj in self.storelist: line = self.obj_to_str(obj) outfile.write(line) # Clear storelist now that they've been written to disk self.clear_storelist() # Update file attributes (in case of new or converted file) self.source = self self.filepath = outfilepath self.is_new = False # If outfilepath is not specified, simply iterate over every # object in self.storelist and append them to the file on disk. else: # If the file is new and the path already exist, do not append if self.is_new and os.path.exists(self.filepath): raise CancerApiException("Output file already exists: {}".format(self.filepath)) with open_file(self.filepath, "a+") as outfile: logging.info("Writing to disk...") # If the file is new, start with header if self.is_new: outfile.write(self.get_header()) # If file is new and source is not self (i.e., converted file), # iterate over source if self.is_new and self.source is not self: for obj in self.source: line = self.obj_to_str(obj) outfile.write(line) # Proceed with iterating over storelist for obj in self.storelist: line = self.obj_to_str(obj) outfile.write(line) # Clear storelist now that they've been written to disk self.clear_storelist() # Update file attributes (in case of new or converted file) self.source = self self.is_new = False
def check_mentions(self): """Checks mentions for sign up's via email or twitter via "Sign up / Sign up [email]""" try: mentions = self.api.mentions_timeline(count=3) for mention in mentions: if "stop" in mention.text.lower(): # Unsubscribe for email if len(mention.text.split()) == 3: email = mention.text.split()[2] email_list = utils.open_file(EMAILS).split() if email in email_list: email_list.remove(email) utils.write_to_file(EMAILS, ' '.join(email_list)) # Unsubscribe for Twitter handle else: twitter_name = mention.user.screen_name twitter_name_list = utils.open_file( TWITTER_NAMES).split() if twitter_name in twitter_name_list: twitter_name_list.remove(twitter_name) utils.write_to_file(TWITTER_NAMES, ' '.join(twitter_name_list)) elif "sign up" in mention.text.lower(): # Email sign up if len(mention.text.split()) > 3: email = mention.text.split()[3] email_list = utils.open_file(EMAILS).split() if email not in email_list: email_list.append(email) utils.append_to_file(EMAILS, email) # Twitter handle sign up else: twitter_name = mention.user.screen_name twitter_name_list = utils.open_file( TWITTER_NAMES).split() if twitter_name not in twitter_name_list: twitter_name_list.append(twitter_name) utils.append_to_file(TWITTER_NAMES, twitter_name) except tweepy.TweepError as error: utils.write_to_log(f'Error checking mentions: {error}')
def main(): """Prints a diff between two files.""" parser = optparse.OptionParser( usage='usage: %prog file-a file-b [options]') parser.add_option( '--text-is-case-insensitive', action='store_false', dest='text_is_case_sensitive', default=True, help='<pfif:full_name>Jane</pfif:full_name> is the same as ' '<pfif:full_name>JANE</pfif:full_name>') parser.add_option( '--no-grouping', action='store_false', default=True, dest='group_by_record_id', help='Rather than grouping all differences pertaining to ' 'the same record together, every difference will be ' 'displayed individually.') parser.add_option( '--ignore-field', action='append', dest='ignore_fields', default=[], help='--ignore-field photo_url will mean that ' 'there will be no messages for photo_url fields that are ' 'added, removed, or changed. To specify multiple fields ' 'to ignore, use this flag multiple times.') parser.add_option( '--omit-blank-fields', action='store_true', default=False, help='Normally, a blank field (ie, <foo></foo>) will count ' 'as a different against a file that does not have that ' 'field at all. If you pass this flag, a blank field will ' 'count as an omitted field.') (options, args) = parser.parse_args() assert len(args) >= 2, 'Must provide two files to diff.' messages = pfif_file_diff( utils.open_file(args[0]), utils.open_file(args[1]), text_is_case_sensitive=options.text_is_case_sensitive, ignore_fields=options.ignore_fields, omit_blank_fields=options.omit_blank_fields) print utils.MessagesOutput.generate_message_summary(messages, is_html=False) if options.group_by_record_id: print utils.MessagesOutput.messages_to_str_by_id(messages) else: print utils.MessagesOutput.messages_to_str(messages)
def match(t_file, dm_file): """ Open files, convert the text to list and clean it. Check for identical content. :param t_file: Full path to file 1, as string :param dm_file: Full path to file 2, as string :return: Three sets = matching content, not matched from file 1, not matched from file 2. """ legacy = utils.open_file(t_file) ontology = utils.open_file(dm_file) legacy_occupations = utils.string_to_list(legacy) ontology_occupations = utils.string_to_list(ontology) cleaned_legacy = utils.clean_text(legacy_occupations) cleaned_ontology = utils.clean_text(ontology_occupations) return match_strings(cleaned_ontology, cleaned_legacy)
def main(): model = 1 file_name = 'train' n_classes = 100 val_phrase = open_file('data/sign-to-gloss/cleaned/split-files/' + file_name + '-phrase-' + str(n_classes)) val_info = open_file('data/sign-to-gloss/cleaned/split-files/' + file_name + '-info-' + str(n_classes)) inp_lines, tar_lines, pred_lines = [], [], [] for i in range(10, 11): inp, tar = create_batch([val_phrase[i]], val_info, n_classes) translate(inp, model) print(tar) """print('Input sentence: ', preprocess_inp_tar(inp))
def OwnData(folder): img = open_file(folder + 'OwnData.mat')['Data'] gt = open_file(folder + 'OwnData_gt.mat')['temp'] # gt = gt.astype('uint8') rgb_bands = (47, 31, 15) # 没改 label_values = ["Background", "Class_1", "Class_2"] ignored_labels = [0] palette = None # 自加,要不报错 return img, gt, rgb_bands, ignored_labels, label_values, palette
def _calculate_total_lengths(self): msg = "Calculating enrichment in regions" if self.counts_file: self.sorted_region_path = self.counts_file if (not self.total_reads_a or not self.total_reads_b or (not self.total_reads_replica and self.use_replica)) and not self.use_MA: self.logger.info("... counting from counts file...") self.total_reads_a = 0 self.total_reads_b = 0 if self.total_reads_replica: self.total_reads_replica = 0 else: self.total_reads_replica = 1 for line in open(self.counts_file): try: enrich = dict(zip(enrichment_keys, line.split())) self.total_reads_a += float(enrich["signal_a"]) self.total_reads_b += float(enrich["signal_b"]) if self.use_replica: self.total_reads_replica += float(enrich["signal_prime_2"]) except ValueError: self.logger.debug("(Counting) skip header...") else: self.logger.info("... counting number of lines in files...") if not self.total_reads_a: if self.experiment_format == BAM: self.total_reads_a = bam.size(self.current_experiment_path) else: self.total_reads_a = sum(1 for line in utils.open_file(self.current_experiment_path, self.experiment_format, logger=self.logger)) if not self.total_reads_b: if self.experiment_format == BAM: self.total_reads_b = bam.size(self.current_control_path) else: self.total_reads_b = sum(1 for line in utils.open_file(self.current_control_path, self.control_format, logger=self.logger)) if self.use_replica and not self.total_reads_replica: if self.experiment_format == BAM: self.total_reads_replica = bam.size(self.replica_path) else: self.total_reads_replica = sum(1 for line in utils.open_file(self.replica_path, self.experiment_format, logger=self.logger)) self.logger.debug("Number lines in experiment A: %s Experiment B: %s"%(self.total_reads_a, self.total_reads_b)) if self.use_replica: msg = "%s using replicas..."%msg else: msg = "%s using swap..."%msg self.logger.info(msg) self.average_total_reads = (self.total_reads_a+self.total_reads_b)/2
def delete_question_by_id(question_id): question_lines = [] questions = utils.open_file(questions_data, QUESTION_HEADER) for row in questions: if row['id'] != question_id: question_lines.append(row) utils.write_to_file(questions_data, question_lines, QUESTION_HEADER) answer_lines = [] answers = utils.open_file(answers_data, ANSWER_HEADER) for row in answers: if row['question_id'] != question_id: answer_lines.append(row) utils.write_to_file(answers_data, answer_lines, ANSWER_HEADER)
def _create_examples(self): """Given a file with feature bundle for each term and a list of positive and negative seeds, create a file with the vectors for the known positive and negative examples. Also create the model from the vectors.""" print('Reading feature vectors and extracting pos and neg examples...') with open_file(self.features_file) as feats, \ open_file(self.vectors_file, 'w') as vectors: for line in feats: try: term = line.split('\t')[2] label = self._get_label(term) if label in ('y', 'n'): vectors.write("%s\t%s" % (label, line)) except Exception as e: print('ERROR:', e)
def include(self, in_relpath, context): #print("including: " + in_relpath); output_str = ""; context["blog"] = self.blog; def include2(relpath): return self.include(relpath, context); context["include"] = include2; fd = utils.open_file(self.in_base_path + "/" + in_relpath, "rb"); lines = collections.deque(fd.readlines()); while (len(lines) > 0): line = lines.popleft(); # XXX: this is not recursive if (re.search("#{for_each_post}", line)): template = ""; while (len(lines) > 0): line = lines.popleft(); if (re.search("#{done}", line)): for p in self.posts: context["post"] = p; output_str += self.replace_commands(template, context); del context["post"]; break; template += line; else: output_str += self.replace_commands(line, context); #print("include returned'" + output_str + "'"); return output_str;
def edit_note(note, upload, session, trainee=False): # Write the current data to a temporary file (fd, temp_filename) = utils.temp_filename() editor = os.environ.get("EDITOR","vi") answer = 'E' while answer == 'E': os.system("%s %s" % (editor, temp_filename)) temp_file = utils.open_file(temp_filename) newnote = temp_file.read().rstrip() temp_file.close() print "New Note:" print utils.prefix_multi_line_string(newnote," ") prompt = "[D]one, Edit, Abandon, Quit ?" answer = "XXX" while prompt.find(answer) == -1: answer = utils.our_raw_input(prompt) m = re_default_answer.search(prompt) if answer == "": answer = m.group(1) answer = answer[:1].upper() os.unlink(temp_filename) if answer == 'A': return elif answer == 'Q': return 0 comment = NewComment() comment.policy_queue = upload.policy_queue comment.package = upload.changes.source comment.version = upload.changes.version comment.comment = newnote comment.author = utils.whoami() comment.trainee = trainee session.add(comment) session.commit()
def get_all_quetions_by_latest(order=False, by='submission_time'): data = [] questions = utils.open_file(questions_data, QUESTION_HEADER) for row in questions: data.append(row) data = sorted(data, key=lambda i: i[by], reverse=order) return data
def __init__(self, *args, **kwargs): self.__dict__ = self.__shared_state if not getattr(self, 'initialised', False): self.initialised = True self.timestamp = time.strftime("%Y%m%d%H%M%S") cnf = Config() if cnf.has_key("Dir::UrgencyLog"): # Create the log directory if it doesn't exist self.log_dir = cnf["Dir::UrgencyLog"] if not os.path.exists(self.log_dir) or not os.access(self.log_dir, os.W_OK): warn("UrgencyLog directory %s does not exist or is not writeable, using /srv/ftp.debian.org/tmp/ instead" % (self.log_dir)) self.log_dir = '/srv/ftp.debian.org/tmp/' # Open the logfile self.log_filename = "%s/.install-urgencies-%s.new" % (self.log_dir, self.timestamp) self.log_file = open_file(self.log_filename, 'w') else: self.log_dir = None self.log_filename = None self.log_file = None self.writes = 0
def prod_maintainer(notes, upload): cnf = Config() changes = upload.changes whitelists = [upload.target_suite.mail_whitelist] # Here we prepare an editor and get them ready to prod... (fd, temp_filename) = utils.temp_filename() temp_file = os.fdopen(fd, 'w') temp_file.write("\n\n=====\n\n".join([note.comment for note in notes])) temp_file.close() editor = os.environ.get("EDITOR", "vi") answer = 'E' while answer == 'E': os.system("%s %s" % (editor, temp_filename)) temp_fh = utils.open_file(temp_filename) prod_message = "".join(temp_fh.readlines()) temp_fh.close() print "Prod message:" print utils.prefix_multi_line_string(prod_message, " ", include_blank_lines=1) prompt = "[P]rod, Edit, Abandon, Quit ?" answer = "XXX" while prompt.find(answer) == -1: answer = utils.our_raw_input(prompt) m = re_default_answer.search(prompt) if answer == "": answer = m.group(1) answer = answer[:1].upper() os.unlink(temp_filename) if answer == 'A': return elif answer == 'Q': return 0 # Otherwise, do the proding... user_email_address = utils.whoami() + " <%s>" % ( cnf["Dinstall::MyAdminAddress"]) changed_by = changes.changedby or changes.maintainer maintainer = changes.maintainer maintainer_to = utils.mail_addresses_for_upload(maintainer, changed_by, changes.fingerprint) Subst = { '__SOURCE__': upload.changes.source, '__CHANGES_FILENAME__': upload.changes.changesname, '__MAINTAINER_TO__': ", ".join(maintainer_to), } Subst["__FROM_ADDRESS__"] = user_email_address Subst["__PROD_MESSAGE__"] = prod_message Subst["__CC__"] = "Cc: " + cnf["Dinstall::MyEmailAddress"] prod_mail_message = utils.TemplateSubst( Subst, cnf["Dir::Templates"] + "/process-new.prod") # Send the prod mail utils.send_mail(prod_mail_message, whitelists=whitelists) print "Sent prodding message"
def main(): """Runs all validations on the provided PFIF XML file""" assert len(sys.argv) == 2, 'Usage: python pfif_validator.py my-pyif-xml-file' validator = PfifValidator(utils.open_file(sys.argv[1], 'r')) messages = validator.run_validations() print(utils.MessagesOutput.generate_message_summary(messages)) print(validator.validator_messages_to_str(messages))
def addSequence(self, fastafile): '''Add sequences on Fasta format to genome browser. Arguments: fastafile -- a string or iterable representing the input Fasta file(s) to be added in the genome browser. ''' directory = self.__directory__ try: os.mkdir(os.path.join(directory, 'sequences')) except: pass if isinstance(fastafile, str): fastafile = (fastafile, ) for i in fastafile: con = open_file(i) seq = '' for line in con: try: line = line.decode('utf-8') except: pass line = line.rstrip() if line.startswith('>'): if not isinstance(seq, str): seq.close() chrom = re.split(' |\|', re.sub('^>', '', line))[0] seq = open( os.path.join(directory, 'sequences', chrom + '.fa'), 'w') seq.write(">" + chrom + "\n") else: seq.write(line) seq.close() con.close()
def __init__(self, *args, **kwargs): self.__dict__ = self.__shared_state if not getattr(self, 'initialised', False): self.initialised = True self.timestamp = time.strftime("%Y%m%d%H%M%S") cnf = Config() if "Dir::UrgencyLog" in cnf: # Create the log directory if it doesn't exist self.log_dir = cnf["Dir::UrgencyLog"] if not os.path.exists(self.log_dir) or not os.access( self.log_dir, os.W_OK): warn( "UrgencyLog directory %s does not exist or is not writeable, using /srv/ftp.debian.org/tmp/ instead" % (self.log_dir)) self.log_dir = '/srv/ftp.debian.org/tmp/' # Open the logfile self.log_filename = "%s/.install-urgencies-%s.new" % ( self.log_dir, self.timestamp) self.log_file = open_file(self.log_filename, 'w') else: self.log_dir = None self.log_filename = None self.log_file = None self.writes = 0
def create_train_test_valid_data(src_path, tgt_path, base_path): x = open_file(src_path) y = open_file(tgt_path) src_train, src, tgt_train, tgt = train_test_split(x, y, test_size=0.2, shuffle=True) src_valid, src_test, tgt_valid, tgt_test = train_test_split( src, tgt, test_size=0.5, shuffle=True) NMTDataset.save_file(src_train, base_path + "src_train.txt") NMTDataset.save_file(tgt_train, base_path + "tgt_train.txt") NMTDataset.save_file(src_valid, base_path + "src_valid.txt") NMTDataset.save_file(tgt_valid, base_path + "tgt_valid.txt") NMTDataset.save_file(src_test, base_path + "src_test.txt") NMTDataset.save_file(tgt_test, base_path + "tgt_test.txt")
def infer(self, model_path, data_path, output): test_reader = paddle.batch(paddle.reader.buffered( reader.create_reader(data_path, self.settings), size=self.conf.batch_size * 1000), batch_size=self.conf.batch_size) # load the trained models parameters = paddle.parameters.Parameters.from_tar( utils.open_file(model_path, "r")) inferer = paddle.inference.Inference(output_layer=self.tags_layer, parameters=parameters) def count_evi_ids(test_batch): num = 0 for sample in test_batch: num += len(sample[reader.E_IDS]) return num for test_batch in test_reader(): tags = inferer.infer(input=test_batch, field=["id"], feeding=network.feeding) evi_ids_num = count_evi_ids(test_batch) assert len(tags) == evi_ids_num print >> output, ";\n".join(str(tag) for tag in tags) + ";"
def right_click_call(self, _): """ 右键点击 行动按钮 """ fp = self.txt['text'] if not fp: utils.showinfo('你还没有选择文件/目录') return if not os.path.exists(fp): utils.showinfo('文件/目录 不存在 "{}"'.format(fp)) return if self.is_folder: utils.open_dir(fp) else: utils.open_file(fp, True)
def get_taxonomy(): a_file = open_file("resources/occupations_from_legacy_taxonomy.txt") strings = string_to_list(a_file) cleaned_strings = clean_text(strings) split_strings = map(split_slash, cleaned_strings) flatten_list = flatten(split_strings) return flatten_list
def edit_note(note, upload, session, trainee=False): # Write the current data to a temporary file (fd, temp_filename) = utils.temp_filename() editor = os.environ.get("EDITOR", "vi") answer = 'E' while answer == 'E': os.system("%s %s" % (editor, temp_filename)) temp_file = utils.open_file(temp_filename) newnote = temp_file.read().rstrip() temp_file.close() print "New Note:" print utils.prefix_multi_line_string(newnote, " ") prompt = "[D]one, Edit, Abandon, Quit ?" answer = "XXX" while prompt.find(answer) == -1: answer = utils.our_raw_input(prompt) m = re_default_answer.search(prompt) if answer == "": answer = m.group(1) answer = answer[:1].upper() os.unlink(temp_filename) if answer == 'A': return elif answer == 'Q': return 0 comment = NewComment() comment.policy_queue = upload.policy_queue comment.package = upload.changes.source comment.version = upload.changes.version comment.comment = newnote comment.author = utils.whoami() comment.trainee = trainee session.add(comment) session.commit()
def filter(alias2ent, freq_chat, th_chat, freq_general, th_general, freq_lex, th_lex, filter_set, complex_filter_set, exceptions): entities = [] for filename in filter_set: t_entities = load_from_file(filename, full = True) entities.extend(t_entities) entities = set(entities) complex_entities = [] for filename in complex_filter_set: t_entities = load_from_file(filename, full = True) complex_entities.extend(t_entities) complex_entities = set(complex_entities) keys = alias2ent.keys() exceptions = set(exceptions) fout = utils.open_file('filter.txt', 'w') for alias in keys: if alias in exceptions: continue # if len(alias) <=1 or alias in entities: if len(alias) <= 1 or alias in entities or decouple(complex_entities, alias, True): alias2ent[alias] = [] continue if freq_chat[alias] > th_chat or freq_general[alias] > th_general or freq_lex[alias] > th_lex: alias2ent[alias] = [] fout.write(u"{}\t{}\t{}\t{}\n".format(alias, freq_chat[alias], freq_general[alias], freq_lex[alias])) fout.close()
def generate(self, in_base_path, out_base_path): self.in_base_path = in_base_path; self.out_base_path = out_base_path; utils.makedirs(out_base_path); imgutils.init(in_base_path); utils.init(in_base_path); self.blog = Struct(json.load(utils.open_file(self.in_base_path + "/blog.json"))); # copy static content cmd = "cp -rf " + in_base_path + "/static/* " + out_base_path; print("copy static content: " + cmd) proc = utils.execute_shell(cmd); # 'dynamic' content for c in ["sticky", "posts"]: setattr(self, c, []); self.generate_content(c); # home page self.generate_home(); # feed self.generate_feed();
def prod_maintainer(notes, upload): cnf = Config() changes = upload.changes whitelists = [ upload.target_suite.mail_whitelist ] # Here we prepare an editor and get them ready to prod... (fd, temp_filename) = utils.temp_filename() temp_file = os.fdopen(fd, 'w') temp_file.write("\n\n=====\n\n".join([note.comment for note in notes])) temp_file.close() editor = os.environ.get("EDITOR","vi") answer = 'E' while answer == 'E': os.system("%s %s" % (editor, temp_filename)) temp_fh = utils.open_file(temp_filename) prod_message = "".join(temp_fh.readlines()) temp_fh.close() print "Prod message:" print utils.prefix_multi_line_string(prod_message," ",include_blank_lines=1) prompt = "[P]rod, Edit, Abandon, Quit ?" answer = "XXX" while prompt.find(answer) == -1: answer = utils.our_raw_input(prompt) m = re_default_answer.search(prompt) if answer == "": answer = m.group(1) answer = answer[:1].upper() os.unlink(temp_filename) if answer == 'A': return elif answer == 'Q': return 0 # Otherwise, do the proding... user_email_address = utils.whoami() + " <%s>" % ( cnf["Dinstall::MyAdminAddress"]) changed_by = changes.changedby or changes.maintainer maintainer = changes.maintainer maintainer_to = utils.mail_addresses_for_upload(maintainer, changed_by, changes.fingerprint) Subst = { '__SOURCE__': upload.changes.source, '__CHANGES_FILENAME__': upload.changes.changesname, '__MAINTAINER_TO__': ", ".join(maintainer_to), } Subst["__FROM_ADDRESS__"] = user_email_address Subst["__PROD_MESSAGE__"] = prod_message Subst["__CC__"] = "Cc: " + cnf["Dinstall::MyEmailAddress"] prod_mail_message = utils.TemplateSubst( Subst,cnf["Dir::Templates"]+"/process-new.prod") # Send the prod mail utils.send_mail(prod_mail_message, whitelists=whitelists) print "Sent prodding message"
def isValid(text): """ A valid input should be in the form of: weather <city> """ text = utils.escape_query(text) content = utils.open_file(os.path.join(os.getcwd(),"glaucobot/assets/salutations.txt")) for line in content: if bool(re.search(r'%s' %(line.lower()), text, re.IGNORECASE)): return True return False
def file_summary(self): # changes["distribution"] may not exist in corner cases # (e.g. unreadable changes files) if not self.changes.has_key("distribution") or not \ isinstance(self.changes["distribution"], dict): self.changes["distribution"] = {} byhand = False new = False summary = "" override_summary = "" for name, entry in sorted(self.files.items()): if entry.has_key("byhand"): byhand = True summary += name + " byhand\n" elif entry.has_key("new"): new = True summary += "(new) %s %s %s\n" % (name, entry["priority"], entry["section"]) if entry.has_key("othercomponents"): summary += "WARNING: Already present in %s distribution.\n" % (entry["othercomponents"]) if entry["type"] == "deb": deb_fh = open_file(name) summary += TagSection(deb_extract_control(deb_fh))["Description"] + '\n' deb_fh.close() else: entry["pool name"] = poolify(self.changes.get("source", ""), entry["component"]) destination = entry["pool name"] + name summary += name + "\n to " + destination + "\n" if not entry.has_key("type"): entry["type"] = "unknown" if entry["type"] in ["deb", "udeb", "dsc"]: # (queue/unchecked), there we have override entries already, use them # (process-new), there we dont have override entries, use the newly generated ones. override_prio = entry.get("override priority", entry["priority"]) override_sect = entry.get("override section", entry["section"]) override_summary += "%s - %s %s\n" % (name, override_prio, override_sect) return (byhand, new, summary, override_summary)
def new_site(root=u'.', force=False): try: os.stat(os.path.join(root, 'index.html')) if not force: msg = "Oops, there's already an index.html file in the source \n"+\ "folder. If you want to overwrite this folder with a new \n"+\ "site, use the --force option." print(msg) sys.exit(1) except OSError: pass print("Creating new site in '{0}'.".format(root)) for fname, text in NEW_SITE.items(): fpath = os.path.join(root, fname) with utils.open_file(fpath, "w", create_dir=True) as afile: afile.write(text)
def _open_log(self, debug): # Create the log directory if it doesn't exist from daklib.config import Config logdir = Config()["Dir::Log"] if not os.path.exists(logdir): umask = os.umask(00000) os.makedirs(logdir, 0o2775) os.umask(umask) # Open the logfile logfilename = "%s/%s" % (logdir, time.strftime("%Y-%m")) logfile = None if debug: logfile = sys.stderr else: umask = os.umask(0o0002) logfile = utils.open_file(logfilename, 'a') os.umask(umask) self.logfile = logfile
def processHeader(self): fd = utils.open_file(self.page_path, "rb"); header_json=""; m=""; in_header = True; for line in fd: if (in_header): if (line == "\n"): in_header = False; header_json += line; else: m += line; try: self.header = json.loads(header_json); except ValueError as e: utils.fatal("malformed content header in " + self.page_path + ":\n" + str(e)); for k in self.header: setattr(self, k, self.header[k]); self.html = markdown.markdown(m);
def open_tag_file(self): tag_file = "{}.tag.txt".format(self.acquisition_path.encode("utf-8")) open_file(tag_file)
def process_file(self, in_relpath, out_relfpath, context): print("=== " + out_relfpath + " ==="); out_fpath = self.out_base_path + "/" + out_relfpath; utils.makedirs(os.path.dirname(out_fpath)); utils.open_file(out_fpath, "wb").write(self.include(in_relpath, context));
def open_rr_file(self): rr_file = "{}.rr.txt".format(self.acquisition_path.encode("utf-8")) open_file(rr_file)
def _open(self): """Use the open_file function on self.source.filepath in 'r' mode""" return open_file(self.source.filepath)
def mc_study(self): """ perform psedudo-experiments by generating random data and models (using statistical bin uncertainty) pseudo data: - sum all model hists - then randomise model: - randomise each input hist individually """ samples = self.samples ntrials = 10000 if self.plot_toy_fits: ntrials = 10 ## save options before mc study tag = self.tag quiet = self.quiet self.quiet = True toy_arrays = {} for s in samples: if not s in toy_arrays: toy_arrays[s] = {} toy_arrays[s]['mean'] = [] toy_arrays[s]['error'] = [] toy_arrays[s]['pull'] = [] toy_arrays[s]['diff'] = [] toy_arrays[s]['mc'] = [] data_array = [] ## initialise toy fitter ## VERY important to prepare hists first ## before initialising, so TFractionFitter ## is not initialise with the real data self.prepare_toy_hists() self.init_fitter(toy=True) for i in xrange(ntrials): if i%100==0: print 'trial ',i #print #print 'trial%d summary: ' % i self.tag = '%s_trial%d'% (tag,i) self.randomise_hists() self.toy_fit() if not self.fit_status == 0: continue if self.plot_toy_fits: self.plot() temp_means = {} temp_errors = {} temp_pulls = {} temp_diffs = {} temp_mcs = {} has_zero = False n_tot_mc = self.ntot_orig() #print 'n_tot_mc: %.1f, h_orig_tot.int: %.1f'% (n_tot_mc,self.h_orig_total.Integral()) n_tot_fit = self.ntot_fit() #print 'n_tot_fit: %.1f, n_data: %.1f' % (n_tot_fit,self.ndata_curr()) for s in samples: n_mc = self.nsamp_orig(s) n_fit = self.nsamp_fit(s) en_fit = self.ensamp_fit(s) pull = (n_fit-n_mc)/en_fit if en_fit else 0.0 diff = n_fit - n_mc temp_means[s] = n_fit temp_errors[s] = en_fit temp_pulls[s] = pull temp_diffs[s] = diff temp_mcs[s] = self.nsamp_curr(s) f_fit = self.fsamp_fit(s) #if f_fit == 0.: has_zero = True if f_fit < 0.0000001: has_zero = True f_mc = n_mc / n_tot_mc if n_tot_mc else 0.0 f_fit = n_fit / n_tot_fit if n_tot_fit else 0.0 #print '%s, mc: %.1f, fit: %.1f, fmc: %.4f, ffit: %.4f' % (s,n_mc,n_fit,f_mc,f_fit) #print 'data, mc: %.1f, fit: %.1f' % (n_tot_mc,n_tot_fit) ## remove cases where any component is fit to 0 ## argument is that we would not take this ## result if we got it in data ## probably should try to do something ## better in future if not has_zero: for s in samples: toy_arrays[s]['mean'].append(temp_means[s]) toy_arrays[s]['error'].append(temp_errors[s]) toy_arrays[s]['pull'].append(temp_pulls[s]) toy_arrays[s]['diff'].append(temp_diffs[s]) toy_arrays[s]['mc'].append(temp_mcs[s]) data_array.append(self.ndata_curr()) else: print 'ERROR - component fit to zero' ## restore to original state before toys self.reset_hists() self.tag = tag self.quiet = quiet ## set corrections from toy study filename = 'toy_%s.root'%(self.tag) for s in samples: a_mean = toy_arrays[s]['mean'] a_error = toy_arrays[s]['error'] a_pull = toy_arrays[s]['pull'] a_diff = toy_arrays[s]['diff'] a_mc = toy_arrays[s]['mc'] if not s in self.toy_results: self.toy_results[s] = {} self.toy_results[s]['meanm'] = numpy.mean(a_mean) self.toy_results[s]['meane'] = numpy.std(a_mean) self.toy_results[s]['errorm'] = numpy.mean(a_error) self.toy_results[s]['errore'] = numpy.std(a_error) self.toy_results[s]['pullm'] = numpy.mean(a_pull) self.toy_results[s]['pulle'] = numpy.std(a_pull) ## create plots h_mean = create_mean_hist(s) h_error = create_error_hist(s) h_pull = create_pull_hist(s) h_diff = create_diff_hist(s) h_mc = create_mc_hist(s) for v in a_mean: h_mean.Fill(v) for v in a_error: h_error.Fill(v) for v in a_pull: h_pull.Fill(v) for v in a_diff: h_diff.Fill(v) for v in a_mc : h_mc.Fill(v) utils.save_object(h_mean,filename) utils.save_object(h_error,filename) utils.save_object(h_pull,filename) utils.save_object(h_diff,filename) utils.save_object(h_mc,filename) h_mc_data = create_mc_hist('data') for v in data_array: h_mc_data.Fill(v) utils.save_object(h_mc_data,filename) for isamp in xrange(len(samples)): s1 = samples[isamp] for isamp2 in xrange(len(samples)): if not isamp2 < isamp: continue s2 = samples[isamp2] h = create_2d_mean_hist(s1,s2) for ns1,ns2 in zip(toy_arrays[s1]['mean'], toy_arrays[s2]['mean']): h.Fill(ns1,ns2) utils.save_object(h,filename) for s in samples: h = create_2d_mean_hist('%s_mc'%s,'%s_fit'%s) for ns1,ns2 in zip(toy_arrays[s]['mc'], toy_arrays[s]['mean']): h.Fill(ns1,ns2) utils.save_object(h,filename) f = utils.open_file(filename) f.Close()
def view(self): utils.open_file(self.localfile) self.watched = True
def handle(text): content = utils.open_file(os.path.join(os.getcwd(),"glaucobot/assets/affirmatives.txt")) return random.choice(content)