def write_info(info_file, mallet_files, t1): fh = open(info_file, 'w') fh.write("$ python %s\n\n" % ' '.join(sys.argv)) fh.write("timestamp = %s\n" % time.strftime("%Y%m%d:%H%M%S")) fh.write("seconds elapsed = %d\n" % int(time.time() - t1)) fh.write("git_commit = %s\n\n" % get_git_commit()) for f in mallet_files: fh.write("source = %s\n" % f)
def _write_info(source_file, out_file1, out_file2, t1, t2): for fname in (out_file1, out_file2): fh = open(fname + '.info', 'w') fh.write("$ python %s\n\n" % ' '.join(sys.argv)) fh.write("source file = %s\n" % os.path.abspath(source_file)) fh.write("target file = %s\n" % os.path.abspath(fname)) fh.write("timestamp = %s\n" % time.strftime("%Y%m%d:%H%M%S")) fh.write("seconds elapsed = %d\n" % int(t2 - t1)) fh.write("git_commit = %s" % get_git_commit())
def _write_info(source_file, target_file, feats_file, feats, t1): fh = open(target_file + '.info', 'w') fh.write("$ python %s\n\n" % ' '.join(sys.argv)) fh.write("source file = %s\n" % os.path.abspath(source_file)) fh.write("target file = %s\n" % os.path.abspath(target_file)) fh.write("features file = %s\n" % feats_file) fh.write("features = %s\n" % ' '.join(sorted(feats.keys()))) fh.write("timestamp = %s\n" % time.strftime("%Y%m%d:%H%M%S")) fh.write("processing time = %ds\n" % (time.time() - t1)) fh.write("git_commit = %s" % get_git_commit())
def _itrainer_create_info_file(corpus, model, filelist, features, annotation): with open(os.path.join(model, 'itrain.info.general'), 'w') as fh: fh.write("$ python %s\n\n" % ' '.join(sys.argv)) fh.write("corpus = %s\n" % corpus) fh.write("file_list = %s\n" % filelist) fh.write("model = %s\n" % model) fh.write("features = %s\n" % features) fh.write("anotation = %s\n" % annotation) fh.write("git_commit = %s\n" % get_git_commit()) shutil.copyfile(annotation, os.path.join(model, 'itrain.info.annotations')) shutil.copyfile(filelist, os.path.join(model, 'itrain.info.files'))
def create_info_files(corpus, model, filelist, classification): with open(os.path.join(classification, 'iclassify.info.general'), 'w') as fh: fh.write("$ python %s\n\n" % ' '.join(sys.argv)) fh.write("corpus = %s\n" % corpus) fh.write("file_list = %s\n" % filelist) fh.write("model = %s\n" % model) fh.write("classification = %s\n" % classification) fh.write("git_commit = %s\n" % get_git_commit()) shutil.copyfile(filelist, os.path.join(classification, 'iclassify.info.files'))
def add_info_file(corpus_dir, extra_files, added): """Append information to CORPUS/config/additions.txt.""" info_file = os.path.join(corpus_dir, 'config', corpus.FNAME_INFO_ADDITIONS) make_writable(info_file) fh = open(info_file, 'a') fh.write("$ %s\n\n" % ' '.join(sys.argv)) fh.write("timestamp = %s\n" % time.strftime("%x %X")) fh.write("file_list = %s\n" % extra_files) fh.write("files_added = %s\n" % added) fh.write("git_commit = %s\n\n\n" % get_git_commit()) fh.close() read_only(info_file)
def _write_info(source_file, target_file, info_string, threshold, t1, t2): if target_file.endswith('.gz'): target_file = target_file[:-3] fh = open(target_file + '.info', 'w') fh.write("$ python %s\n\n" % ' '.join(sys.argv)) fh.write("source file = %s\n" % os.path.abspath(source_file)) fh.write("target file = %s\n" % os.path.abspath(target_file)) fh.write("threshold = %d\n" % threshold) fh.write("timestamp = %s\n" % time.strftime("%Y%m%d:%H%M%S")) fh.write("seconds elapsed = %d\n" % int(t2 - t1)) fh.write("git_commit = %s" % get_git_commit()) fh.write("\n\n" + info_string + "\n")
def _generate_settings(self): self.command = "$ python %s\n\n" % ' '.join(sys.argv) self.settings = [ "timestamp = %s\n" % time.strftime("%x %X"), "language = %s\n" % self.language, "datasource = %s\n" % self.datasource, "source_file = %s\n" % self.source_file, "source_path = %s\n" % self.source_path, "target_path = %s\n" % self.location, "shuffle = %s\n" % str(self.shuffle_file), "git_commit = %s\n" % get_git_commit() ]
def _create_info_general_file(self): with open(self.info_file_general, 'w') as fh: fh.write("$ python %s\n\n" % ' '.join(sys.argv)) fh.write("model = %s\n" % os.path.abspath(self.model)) fh.write("corpus = %s\n" % os.path.abspath(self.corpus)) fh.write("features = %s\n" % ' '.join(get_features())) fh.write("file_list = %s\n" % os.path.abspath(self.file_list)) fh.write("annotation_file = %s\n" % os.path.abspath(self.annotation_file)) fh.write("annotation_count = %s\n" % self.annotation_count) fh.write("config_file = %s\n" % \ os.path.abspath(rconfig.pipeline_config_file)) fh.write("timestamp = %s\n" % time.strftime("%Y%m%d:%H%M%S")) fh.write("git_commit = %s\n" % get_git_commit())
def _write_info(mallet_file, model_file, mtrainer, out_file, stderr_file, t1): fh = open(model_file + '.info', 'w') fh.write("$ python %s\n\n" % ' '.join(sys.argv)) fh.write("mallet file = %s\n" % os.path.abspath(mallet_file)) fh.write("model file = %s\n" % os.path.abspath(model_file)) fh.write("trainer settings = %s\n" % mtrainer.settings()) fh.write("timestamp = %s\n" % time.strftime("%Y%m%d:%H%M%S")) fh.write("time elapsed = %ds\n" % (time.time() - t1)) fh.write("git_commit = %s\n\n" % get_git_commit()) fh.write("$ %s\n\n" % mtrainer.saved_create_vectors_command) fh.write("$ %s\n" % mtrainer.saved_create_model_command) fh.write("\nContents of .out file:\n\n") for line in open(out_file): fh.write(" %s" % line) fh.write("\nContents of .stderr file:\n\n") for line in open(stderr_file): line = line.replace("\f", "\n ") line = line.replace("\r", "\n ") fh.write(" %s" % line) for cmd in mtrainer.saved_create_cinfo_commands: fh.write("\n$ %s\n" % cmd)
print fname fh_terms = open_input_file(fname) count = 0 for line in fh_terms: count += 1 if count > 100000: break if count % 500000 == 0: print ' ', count fields = line.split("\t") term = fields[0] term_count = int(fields[2]) terms[term] = terms.get(term, 0) + term_count return terms if __name__ == '__main__': target_dir = sys.argv[1] result_files = [] for exp in sys.argv[2:]: files = glob.glob(exp) result_files.extend(files) ensure_path(target_dir) infofile = target_dir + '/merged_term_frequencies.info.txt' fh_info = codecs.open(infofile, 'w', encoding='utf-8') fh_info.write("git commit = %s\n\n" % get_git_commit()) for fname in result_files: fh_info.write(fname + u"\n") merge_result_files(target_dir, result_files)