def build_data_pipeline(params, preload=True, test=False): data_directory = params['data_dir'] train_files = utils.join_files(data_directory, TRAIN_FILES) test_files = utils.join_files(data_directory, TEST_FILES) utils.cond_wget_untar(data_directory, train_files + test_files, 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz', moveFiles=zip(utils.join_files('cifar-10-batches-bin', TRAIN_FILES + TEST_FILES), utils.join_files(data_directory, TRAIN_FILES + TEST_FILES))) # Images are 32x32x3 bytes, with an extra byte at the start for the label if test: return _file_pipeline(params, utils.join_files(data_directory, TEST_FILES), preload=preload, test=True) else: return _file_pipeline(params, utils.join_files(data_directory, TRAIN_FILES), preload=preload)
def build_data_pipeline(data_directory, batch_size, preload=True, test=False): train_files = utils.join_files(data_directory, TRAIN_FILES) test_files = utils.join_files(data_directory, TEST_FILES) utils.cond_wget_untar( data_directory, train_files + test_files, 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz', moveFiles=zip( utils.join_files('cifar-10-batches-bin', TRAIN_FILES + TEST_FILES), utils.join_files(data_directory, TRAIN_FILES + TEST_FILES))) # Images are 32x32x3 bytes, with an extra byte at the start for the label if test: return _file_pipeline(batch_size, utils.join_files(data_directory, TEST_FILES), preload=preload, test=True) else: return _file_pipeline(batch_size, utils.join_files(data_directory, TRAIN_FILES), preload=preload)
def make_output_dirs(self): self.output_err = '' try: self.progress_text = 'Removing old output directory...\n' output_dir = utils.path_join(self.output_dir(), self.project_name()) if os.path.exists(output_dir): utils.rmtree(output_dir, ignore_errors=True) temp_dir = utils.path_join(TEMP_DIR, 'webexectemp') if os.path.exists(temp_dir): utils.rmtree(temp_dir, ignore_errors=True) self.progress_text = 'Making new directories...\n' if not os.path.exists(output_dir): os.makedirs(output_dir) os.makedirs(temp_dir) self.copy_files_to_project_folder() json_file = utils.path_join(self.project_dir(), 'package.json') global_json = utils.get_data_file_path('files/global.json') if self.output_package_json: with codecs.open(json_file, 'w+', encoding='utf-8') as f: f.write(self.generate_json()) with codecs.open(global_json, 'w+', encoding='utf-8') as f: f.write(self.generate_json(global_json=True)) zip_file = utils.path_join(temp_dir, self.project_name()+'.nw') app_nw_folder = utils.path_join(temp_dir, self.project_name()+'.nwf') utils.copytree(self.project_dir(), app_nw_folder, ignore=shutil.ignore_patterns(output_dir)) zip_files(zip_file, self.project_dir(), exclude_paths=[output_dir]) for ex_setting in self.settings['export_settings'].values(): if ex_setting.value: self.progress_text = '\n' name = ex_setting.display_name self.progress_text = u'Making files for {}...'.format(name) export_dest = utils.path_join(output_dir, ex_setting.name) versions = re.findall('(\d+)\.(\d+)\.(\d+)', self.selected_version())[0] minor = int(versions[1]) if minor >= 12: export_dest = export_dest.replace('node-webkit', 'nwjs') if os.path.exists(export_dest): utils.rmtree(export_dest, ignore_errors=True) # shutil will make the directory for us utils.copytree(get_data_path('files/'+ex_setting.name), export_dest, ignore=shutil.ignore_patterns('place_holder.txt')) utils.rmtree(get_data_path('files/'+ex_setting.name), ignore_errors=True) self.progress_text += '.' if 'mac' in ex_setting.name: uncomp_setting = self.get_setting('uncompressed_folder') uncompressed = uncomp_setting.value app_path = utils.path_join(export_dest, self.project_name()+'.app') try: utils.move(utils.path_join(export_dest, 'nwjs.app'), app_path) except IOError: utils.move(utils.path_join(export_dest, 'node-webkit.app'), app_path) plist_path = utils.path_join(app_path, 'Contents', 'Info.plist') plist_dict = plistlib.readPlist(plist_path) plist_dict['CFBundleDisplayName'] = self.project_name() plist_dict['CFBundleName'] = self.project_name() version_setting = self.get_setting('version') plist_dict['CFBundleShortVersionString'] = version_setting.value plist_dict['CFBundleVersion'] = version_setting.value plistlib.writePlist(plist_dict, plist_path) self.progress_text += '.' app_nw_res = utils.path_join(app_path, 'Contents', 'Resources', 'app.nw') if uncompressed: utils.copytree(app_nw_folder, app_nw_res) else: utils.copy(zip_file, app_nw_res) self.create_icns_for_app(utils.path_join(app_path, 'Contents', 'Resources', 'nw.icns')) self.progress_text += '.' else: ext = '' windows = False if 'windows' in ex_setting.name: ext = '.exe' windows = True nw_path = utils.path_join(export_dest, ex_setting.dest_files[0]) if windows: self.replace_icon_in_exe(nw_path) self.compress_nw(nw_path) dest_binary_path = utils.path_join(export_dest, self.project_name() + ext) if 'linux' in ex_setting.name: self.make_desktop_file(dest_binary_path, export_dest) join_files(dest_binary_path, nw_path, zip_file) sevenfivefive = (stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) os.chmod(dest_binary_path, sevenfivefive) self.progress_text += '.' if os.path.exists(nw_path): os.remove(nw_path) except Exception: error = u''.join([unicode(x) for x in traceback.format_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2])]) self.logger.error(error) self.output_err += error finally: utils.rmtree(temp_dir, ignore_errors=True)
def make_output_dirs(self): self.output_err = '' try: self.progress_text = 'Removing old output directory...\n' output_dir = os.path.join(self.output_dir(), self.project_name()) temp_dir = os.path.join(TEMP_DIR, 'webexectemp') if os.path.exists(temp_dir): shutil.rmtree(temp_dir) self.progress_text = 'Making new directories...\n' if not os.path.exists(output_dir): os.makedirs(output_dir) os.makedirs(temp_dir) self.copy_files_to_project_folder() json_file = os.path.join(self.project_dir(), 'package.json') if self.output_package_json: with open(json_file, 'w+') as f: f.write(self.generate_json()) zip_file = os.path.join(temp_dir, self.project_name() + '.nw') zip_files(zip_file, self.project_dir(), exclude_paths=[output_dir]) for ex_setting in self.settings['export_settings'].values(): if ex_setting.value: self.progress_text = '\n' name = ex_setting.display_name self.progress_text = 'Making files for {}...'.format(name) export_dest = os.path.join(output_dir, ex_setting.name) versions = re.findall('(\d+)\.(\d+)\.(\d+)', self.selected_version())[0] minor = int(versions[1]) if minor >= 12: export_dest = export_dest.replace( 'node-webkit', 'nwjs') if os.path.exists(export_dest): shutil.rmtree(export_dest) # shutil will make the directory for us shutil.copytree( os.path.join('files', ex_setting.name), export_dest, ignore=shutil.ignore_patterns('place_holder.txt')) shutil.rmtree(os.path.join('files', ex_setting.name)) self.progress_text += '.' if 'mac' in ex_setting.name: app_path = os.path.join(export_dest, self.project_name() + '.app') try: shutil.move(os.path.join(export_dest, 'nwjs.app'), app_path) except IOError: shutil.move( os.path.join(export_dest, 'node-webkit.app'), app_path) plist_path = os.path.join(app_path, 'Contents', 'Info.plist') plist_dict = plistlib.readPlist(plist_path) plist_dict['CFBundleDisplayName'] = self.project_name() plist_dict['CFBundleName'] = self.project_name() version_setting = self.get_setting('version') plist_dict[ 'CFBundleShortVersionString'] = version_setting.value plist_dict['CFBundleVersion'] = version_setting.value plistlib.writePlist(plist_dict, plist_path) self.progress_text += '.' shutil.copy( zip_file, os.path.join(app_path, 'Contents', 'Resources', 'app.nw')) self.create_icns_for_app( os.path.join(app_path, 'Contents', 'Resources', 'nw.icns')) self.progress_text += '.' else: ext = '' windows = False if 'windows' in ex_setting.name: ext = '.exe' windows = True nw_path = os.path.join(export_dest, ex_setting.dest_files[0]) if windows: self.replace_icon_in_exe(nw_path) #self.compress_nw(nw_path) dest_binary_path = os.path.join( export_dest, self.project_name() + ext) join_files(dest_binary_path, nw_path, zip_file) sevenfivefive = (stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) os.chmod(dest_binary_path, sevenfivefive) self.progress_text += '.' if os.path.exists(nw_path): os.remove(nw_path) except Exception: exc = traceback.format_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]) self.output_err += ''.join(exc) finally: shutil.rmtree(temp_dir)
def makeOutputDirs(self): self.output_err = '' try: self.progress_text = 'Removing old output directory...' outputDir = os.path.join(self.outputDir(), self.projectName()) tempDir = os.path.join(TEMP_DIR, 'webexectemp') if os.path.exists(tempDir): shutil.rmtree(tempDir) self.progress_text = 'Making new directories...' if not os.path.exists(outputDir): os.makedirs(outputDir) os.makedirs(tempDir) self.copyFilesToProjectFolder() json_file = os.path.join(self.projectDir(), 'package.json') with open(json_file, 'w+') as f: f.write(self.generate_json()) zip_file = os.path.join(tempDir, self.projectName()+'.nw') zip_files(zip_file, self.projectDir(), exclude_paths=[outputDir]) for ex_setting in self.export_settings.values(): if ex_setting.value: self.progress_text = 'Making files for {}'.format(ex_setting.display_name) export_dest = os.path.join(outputDir, ex_setting.name) if os.path.exists(export_dest): shutil.rmtree(export_dest) #shutil will make the directory for us shutil.copytree(os.path.join('files', ex_setting.name), export_dest) self.progress_text += '.' if ex_setting.name == 'mac': app_path = os.path.join(export_dest, self.projectName()+'.app') enc_app_path = app_path.encode('utf8') enc_export_dest = export_dest.encode('utf8') shutil.move(os.path.join(export_dest, 'node-webkit.app'), app_path) self.progress_text += '.' shutil.copy(zip_file, os.path.join(app_path, 'Contents', 'Resources', 'app.nw')) self.progress_text += '.' else: ext = '' if ex_setting.name == 'windows': ext = '.exe' nw_path = os.path.join(export_dest, ex_setting.dest_files[0]) dest_binary_path = os.path.join(export_dest, self.projectName()+ext) if ex_setting.name == 'linux-x64-NewLib': command = 'sed -i s/udev.so.0/udev.so.1/g ' + nw_path os.system(command.encode('utf8')) elif ex_setting.name == 'linux-x32-NewLib': command = 'sed -i s/udev.so.0/udev.so.1/g ' + nw_path os.system(command.encode('utf8')) join_files(os.path.join(export_dest, self.projectName()+ext), nw_path, zip_file) sevenfivefive = stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH os.chmod(dest_binary_path, sevenfivefive) self.progress_text += '.' if os.path.exists(nw_path): os.remove(nw_path) except Exception as e: self.output_err += ''.join(traceback.format_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2])) finally: shutil.rmtree(tempDir)
def makeOutputDirs(self): self.output_err = '' try: self.progress_text = 'Removing old output directory...' outputDir = os.path.join(self.outputDir(), self.projectName()) tempDir = os.path.join(TEMP_DIR, 'webexectemp') if os.path.exists(tempDir): shutil.rmtree(tempDir) self.progress_text = 'Making new directories...' if not os.path.exists(outputDir): os.makedirs(outputDir) os.makedirs(tempDir) self.copyFilesToProjectFolder() json_file = os.path.join(self.projectDir(), 'package.json') with open(json_file, 'w+') as f: f.write(self.generate_json()) zip_file = os.path.join(tempDir, self.projectName()+'.nw') zip_files(zip_file, self.projectDir(), exclude_paths=[outputDir]) for ex_setting in self.export_settings.values(): if ex_setting.value: self.progress_text = 'Making files for {}'.format(ex_setting.display_name) export_dest = os.path.join(outputDir, ex_setting.name) if os.path.exists(export_dest): shutil.rmtree(export_dest) #shutil will make the directory for us shutil.copytree(os.path.join('files', ex_setting.name), export_dest) self.progress_text += '.' if ex_setting.name == 'mac': app_path = os.path.join(export_dest, self.projectName()+'.app') shutil.move(os.path.join(export_dest, 'node-webkit.app'), app_path) self.progress_text += '.' shutil.copy(zip_file, os.path.join(app_path, 'Contents', 'Resources', 'app.nw')) self.progress_text += '.' else: ext = '' if ex_setting.name == 'windows': ext = '.exe' nw_path = os.path.join(export_dest, ex_setting.dest_files[0]) dest_binary_path = os.path.join(export_dest, self.projectName()+ext) join_files(os.path.join(export_dest, self.projectName()+ext), nw_path, zip_file) sevenfivefive = stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH os.chmod(dest_binary_path, sevenfivefive) self.progress_text += '.' if os.path.exists(nw_path): os.remove(nw_path) except Exception as e: self.output_err += ''.join(traceback.format_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2])) finally: shutil.rmtree(tempDir)
# Generate the csvs that contain the features associated to the triples processes = [Process(target=worker, args=(i, )) for i in range(N_THREADS)] for p in processes: p.start() for p in processes: p.join() if "--stop-after-generate" in sys.argv: sys.exit() # Join the csvs that have been generated by the threads print("Joining csvs") train_out = f"output/{DATASET}/train.csv" test_out = f"output/{DATASET}/test.csv" join_files(train_out, train_out + ".*") join_files(test_out, test_out + ".*") # Remove features that always have the same value and are thus useless print("Removing useless features") filter_features(train_out, test_out) rels_to_study = None rels_study_path = f"datasets/{DATASET}/relations_to_study.txt" if isfile(rels_study_path): rels_to_study = [] with open(rels_study_path, "r") as f: for line in f: if line: rels_to_study.append(line.strip().split("\t")[0])
def main(): alphabet_txt = os.path.join(LANG.model_dir, 'alphabet.txt') raw_txt_gz = os.path.join(LANG.model_dir, 'raw.txt.gz') unprepared_txt = os.path.join(LANG.model_dir, 'unprepared.txt') prepared_txt = os.path.join(LANG.model_dir, 'prepared.txt') vocabulary_txt = os.path.join(LANG.model_dir, 'vocabulary.txt') unfiltered_arpa = os.path.join(LANG.model_dir, 'unfiltered.arpa') filtered_arpa = os.path.join(LANG.model_dir, 'filtered.arpa') lm_binary = os.path.join(LANG.model_dir, 'lm.binary') kenlm_scorer = os.path.join(LANG.model_dir, 'kenlm.scorer') temp_prefix = os.path.join(LANG.model_dir, 'tmp') section('Writing alphabet file', empty_lines_before=1) with open(alphabet_txt, 'w', encoding='utf-8') as alphabet_file: alphabet_file.write('\n'.join(LANG.alphabet) + '\n') redo = ARGS.force_download section('Downloading text data') redo = maybe_download(LANG.text_url, raw_txt_gz, force=redo) section('Unzipping text data') redo = maybe_ungzip(raw_txt_gz, unprepared_txt, force=redo) redo = redo or ARGS.force_prepare section('Preparing text and building vocabulary') if redo or not os.path.isfile(prepared_txt) or not os.path.isfile(vocabulary_txt): redo = True announce('Preparing {} shards of "{}"...'.format(ARGS.workers, unprepared_txt)) counters = Queue(ARGS.workers) source_bytes = os.path.getsize(unprepared_txt) aggregator_process = Process(target=aggregate_counters, args=(vocabulary_txt, source_bytes, counters)) aggregator_process.start() counter_processes = list(map(lambda index: Process(target=count_words, args=(index, counters)), range(ARGS.workers))) try: for p in counter_processes: p.start() for p in counter_processes: p.join() counters.put(STOP_TOKEN) aggregator_process.join() print('') partials = list(map(lambda i: get_partial_path(i), range(ARGS.workers))) join_files(partials, prepared_txt) for partial in partials: os.unlink(partial) except KeyboardInterrupt: aggregator_process.terminate() for p in counter_processes: p.terminate() raise else: announce('Files "{}" and \n\t"{}" existing - not preparing'.format(prepared_txt, vocabulary_txt)) redo = redo or ARGS.force_generate section('Building unfiltered language model') if redo or not os.path.isfile(unfiltered_arpa): redo = True lmplz_args = [ KENLM_BIN + '/lmplz', '--temp_prefix', temp_prefix, '--memory', '80%', '--discount_fallback', '--limit_vocab_file', vocabulary_txt, '--text', prepared_txt, '--arpa', unfiltered_arpa, '--skip', 'symbols', '--order', str(LANG.order) ] if len(LANG.prune) > 0: lmplz_args.append('--prune') lmplz_args.extend(list(map(str, LANG.prune))) subprocess.check_call(lmplz_args) else: announce('File "{}" existing - not generating'.format(unfiltered_arpa)) section('Filtering language model') if redo or not os.path.isfile(filtered_arpa): redo = True with open(vocabulary_txt, 'rb') as vocabulary_file: vocabulary_content = vocabulary_file.read() subprocess.run([ KENLM_BIN + '/filter', 'single', 'model:' + unfiltered_arpa, filtered_arpa ], input=vocabulary_content, check=True) else: announce('File "{}" existing - not filtering'.format(filtered_arpa)) section('Generating binary representation') if redo or not os.path.isfile(lm_binary): redo = True subprocess.check_call([ KENLM_BIN + '/build_binary', '-a', '255', '-q', '8', '-v', 'trie', filtered_arpa, lm_binary ]) else: announce('File "{}" existing - not generating'.format(lm_binary)) section('Building scorer') if redo or not os.path.isfile(kenlm_scorer): redo = True words = set() vocab_looks_char_based = True with open(vocabulary_txt) as vocabulary_file: for line in vocabulary_file: for word in line.split(): words.add(word.encode()) if len(word) > 1: vocab_looks_char_based = False announce("{} unique words read from vocabulary file.".format(len(words))) announce( "{} like a character based model.".format( "Looks" if vocab_looks_char_based else "Doesn't look" ) ) if ARGS.alphabet_mode == 'auto': use_utf8 = vocab_looks_char_based elif ARGS.alphabet_mode == 'utf8': use_utf8 = True else: use_utf8 = False serialized_alphabet = get_serialized_utf8_alphabet() if use_utf8 else LANG.get_serialized_alphabet() from ds_ctcdecoder import Scorer, Alphabet alphabet = Alphabet() err = alphabet.deserialize(serialized_alphabet, len(serialized_alphabet)) if err != 0: announce('Error loading alphabet: {}'.format(err)) sys.exit(1) scorer = Scorer() scorer.set_alphabet(alphabet) scorer.set_utf8_mode(use_utf8) scorer.reset_params(LANG.alpha, LANG.beta) scorer.load_lm(lm_binary) scorer.fill_dictionary(list(words)) shutil.copy(lm_binary, kenlm_scorer) scorer.save_dictionary(kenlm_scorer, True) # append, not overwrite announce('Package created in {}'.format(kenlm_scorer)) announce('Testing package...') scorer = Scorer() scorer.load_lm(kenlm_scorer) else: announce('File "{}" existing - not building'.format(kenlm_scorer))
import utils as ut ut.create_network() ut.translate("seed_genes", "sg_entrez") ut.join_files("sg_entrez", "diamond", "intersection_list") ut.top_ten("go_ora") ut.top_ten("path_ora")
def make_output_dirs(self): self.output_err = '' try: self.progress_text = 'Removing old output directory...\n' output_dir = os.path.join(self.output_dir(), self.project_name()) temp_dir = os.path.join(TEMP_DIR, 'webexectemp') if os.path.exists(temp_dir): shutil.rmtree(temp_dir) self.progress_text = 'Making new directories...\n' if not os.path.exists(output_dir): os.makedirs(output_dir) os.makedirs(temp_dir) self.copy_files_to_project_folder() json_file = os.path.join(self.project_dir(), 'package.json') if self.output_package_json: with open(json_file, 'w+') as f: f.write(self.generate_json()) zip_file = os.path.join(temp_dir, self.project_name()+'.nw') zip_files(zip_file, self.project_dir(), exclude_paths=[output_dir]) for ex_setting in self.settings['export_settings'].values(): if ex_setting.value: self.progress_text = '\n' name = ex_setting.display_name self.progress_text = 'Making files for {}...'.format(name) export_dest = os.path.join(output_dir, ex_setting.name) versions = re.findall('(\d+)\.(\d+)\.(\d+)', self.selected_version())[0] minor = int(versions[1]) if minor >= 12: export_dest = export_dest.replace('node-webkit', 'nwjs') if os.path.exists(export_dest): shutil.rmtree(export_dest) # shutil will make the directory for us shutil.copytree(os.path.join('files', ex_setting.name), export_dest, ignore=shutil.ignore_patterns('place_holder.txt')) shutil.rmtree(os.path.join('files', ex_setting.name)) self.progress_text += '.' if 'mac' in ex_setting.name: app_path = os.path.join(export_dest, self.project_name()+'.app') try: shutil.move(os.path.join(export_dest, 'nwjs.app'), app_path) except IOError: shutil.move(os.path.join(export_dest, 'node-webkit.app'), app_path) self.progress_text += '.' shutil.copy(zip_file, os.path.join(app_path, 'Contents', 'Resources', 'app.nw')) self.create_icns_for_app(os.path.join(app_path, 'Contents', 'Resources', 'nw.icns')) self.progress_text += '.' else: ext = '' windows = False if 'windows' in ex_setting.name: ext = '.exe' windows = True nw_path = os.path.join(export_dest, ex_setting.dest_files[0]) if windows: self.replace_icon_in_exe(nw_path) #self.compress_nw(nw_path) dest_binary_path = os.path.join(export_dest, self.project_name() + ext) join_files(dest_binary_path, nw_path, zip_file) sevenfivefive = (stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) os.chmod(dest_binary_path, sevenfivefive) self.progress_text += '.' if os.path.exists(nw_path): os.remove(nw_path) except Exception: exc = traceback.format_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]) self.output_err += ''.join(exc) finally: shutil.rmtree(temp_dir)