def test_seqinfo(self): with tempfile.TemporaryFile() as tf, \ open(config.data_path('minimal_taxonomy.csv')) as taxtable_fp, \ open(config.data_path('minimal_add_taxonomy.csv')) as extra_nodes_fp: class _Args(object): extra_nodes_csv = extra_nodes_fp taxtable = taxtable_fp out_file = tf verbosity = 0 self.assertFalse(add_to_taxtable.action(_Args())) # No output check at present self.assertTrue(tf.tell() > 0)
def test_rollforward(self): with config.tempdir() as scratch: rpkg = os.path.join(scratch, 'tostrip.refpkg') shutil.copytree(config.data_path('lactobacillus2-0.2.refpkg'), rpkg) r = refpkg.Refpkg(rpkg, create=False) original_contents = copy.deepcopy(r.contents) r.update_metadata('boris', 'hilda') r.update_metadata('meep', 'natasha') updated_contents = copy.deepcopy(r.contents) r.rollback() r.rollback() class _Args(object): refpkg = rpkg def __init__(self, n): self.n = n self.assertEqual(rollforward.action(_Args(3)), 1) r._sync_from_disk() self.assertEqual(r.contents['metadata'], original_contents['metadata']) self.assertEqual(rollforward.action(_Args(2)), 0) r._sync_from_disk() self.assertEqual(r.contents['metadata'], updated_contents['metadata']) self.assertEqual(r.contents['rollforward'], None) self.assertNotEqual(r.contents['rollback'], None)
def test_seqinfo(self): with tempfile.TemporaryFile() as tf, \ open(config.data_path('simple_seqinfo.csv')) as ifp: class _Args(object): database_file = config.ncbi_master_db taxids = None taxnames = None seq_info = ifp out_file = tf verbosity = 0 self.assertEqual(taxtable.action(_Args()), 0) # No output check at present self.assertTrue(tf.tell() > 0)
def test_action(self): with config.tempdir() as scratch: pkg_path = os.path.join(scratch, 'test.refpkg') r = refpkg.Refpkg(pkg_path) test_file = config.data_path('bv_refdata.csv') class _Args(object): refpkg=pkg_path changes = ['meep='+test_file, 'hilda='+test_file] metadata = False update.action(_Args()) r._sync_from_disk() self.assertEqual(r.contents['files']['meep'], 'bv_refdata.csv') self.assertEqual(r.contents['files']['hilda'], 'bv_refdata.csv1')
def test_strip(self): with config.tempdir() as scratch: rpkg = os.path.join(scratch, 'tostrip.refpkg') shutil.copytree(config.data_path('lactobacillus2-0.2.refpkg'), rpkg) r = refpkg.Refpkg(rpkg, create=False) r.update_metadata('boris', 'hilda') r.update_metadata('meep', 'natasha') class _Args(object): refpkg = rpkg strip.action(_Args()) r._sync_from_disk() self.assertEqual(r.contents['rollback'], None) self.assertEqual(r.contents['rollforward'], None)
def test_seqinfo(self): with tempfile.TemporaryFile() as tf, \ open(config.data_path('simple_seqinfo.csv')) as ifp: class _Args(object): database_file = config.ncbi_master_db taxids = None taxnames = None seq_info = ifp out_file = tf verbosity = 0 full = False self.assertEqual(taxtable.action(_Args()), 0) # No output check at present self.assertTrue(tf.tell() > 0)
def load(self, filename="hmm.json"): filename = data_path(filename) fr = open(filename, 'r') txt = fr.read() model = json.loads(txt) # build emit mat mat = model['emit_mat'] data = [] for state in STATES: for observe in mat[state]: data.append((state, observe, mat[state][observe])) self.emit_mat = self.context.parallelize(data) # build others self.trans_mat = model['trans_mat'] self.init_vec = model['init_vec'] self.state_count = model['state_count']
def test_seqinfo(self): with tempfile.TemporaryFile() as tf, \ open(config.data_path('simple_seqinfo.csv')) as ifp: class _Args(object): url = 'sqlite:///' + config.ncbi_master_db schema = None valid = False ranked = False tax_ids = None taxnames = None seq_info = ifp out = tf verbosity = 0 clade_ids = None taxtable = None self.assertIsNone(taxtable.action(_Args())) # No output check at present self.assertTrue(tf.tell() > 0)
def test_action(self): with config.tempdir() as scratch: pkg_path = os.path.join(scratch, 'test.refpkg') r = refpkg.Refpkg(pkg_path, create=True) test_file = config.data_path('bv_refdata.csv') class _Args(object): refpkg=pkg_path changes = ['meep='+test_file, 'hilda='+test_file] metadata = False update.action(_Args()) r._sync_from_disk() self.assertEqual(r.contents['files']['meep'], 'bv_refdata.csv') # Second file should have been assigned a non-clashing name h = r.contents['files']['hilda'] self.assertNotEqual(h, 'bv_refdata.csv') self.assertTrue(h.startswith('bv_refdata')) self.assertTrue(h.endswith('.csv')) self.assertTrue(os.path.exists(r.resource_path('hilda')))
def test_action(self): with config.tempdir() as scratch: pkg_path = os.path.join(scratch, 'test.refpkg') r = refpkg.Refpkg(pkg_path, create=True) test_file = config.data_path('bv_refdata.csv') self.args.refpkg = pkg_path self.args.changes = ['meep=' + test_file, 'hilda=' + test_file] update.action(self.args) r._sync_from_disk() self.assertEqual(r.contents['files']['meep'], 'bv_refdata.csv') # Second file should have been assigned a non-clashing name h = r.contents['files']['hilda'] self.assertNotEqual(h, 'bv_refdata.csv') self.assertTrue(h.startswith('bv_refdata')) self.assertTrue(h.endswith('.csv')) self.assertTrue(os.path.exists(r.resource_path('hilda')))
def setUp(self): self.t1 = data_path('simple_taxtable.csv') self.t2 = data_path('taxids1.taxtable') self.parser = argparse.ArgumentParser() merge_taxtables.build_parser(self.parser) self.outfile = os.path.join(self.mkoutdir(), 'taxtable.csv')
class _Args(object): refpkg = config.data_path('lactobacillus2-0.2.refpkg')
output = translateFile(TEMP_FILENAME) os.remove(TEMP_FILENAME) return output def translateFile(input_filename, output_filename=None): """ Translates a text file of sentences into a dictionary. """ out = subprocess.check_output(['phonetisaurus-g2p', '--model=%s' % (G014B2B_FST), '--input=%s' % (input_filename), '--words', '--isfile']) out = parseOutput(out) if output_filename: out = '\n'.join(out) f = open(output_filename, "wb") f.write(out) f.close() return None return out if __name__ == "__main__": translateFile(os.path.expanduser(data_path("/sentences.txt")), os.path.expanduser(data_path("/dictionary.dic")))
class TestUpdateTaxids(config.TestBase): def main(self, arguments): taxit.main(['update_taxids'] + [str(a) for a in arguments]) log_info = 'taxit update_taxids ' thisdata_path = config.data_path('update_taxids', 'TestUpdateTaxids') seq_info = config.data_path(thisdata_path, 'seq_info.csv') small_taxonomy_db = 'sqlite:///' + config.data_path('small_taxonomy.db') def test01(self): """ Minimal inputs """ args = [self.seq_info, self.small_taxonomy_db] log.info(self.log_info + ' '.join(map(str, args))) # ValueError: Unknown or missing tax_ids present self.assertRaises(ValueError, self.main, args) def test02(self): """ --ignore-unknowns """ this_test = sys._getframe().f_code.co_name thisdata_path = self.thisdata_path ref = os.path.join(thisdata_path, this_test, 'update.csv') outdir = self.mkoutdir() out = os.path.join(outdir, 'update.csv') args = [ '--ignore-unknowns', '--out', out, self.seq_info, self.small_taxonomy_db ] log.info(self.log_info + ' '.join(map(str, args))) self.main(args) self.assertTrue(filecmp.cmp(out, ref)) def test03(self): """ --unknowns unknowns.csv """ this_test = sys._getframe().f_code.co_name thisdata_path = self.thisdata_path ref_info = os.path.join(thisdata_path, this_test, 'update.csv') ref_unknowns = os.path.join(thisdata_path, this_test, 'unknowns.csv') outdir = self.mkoutdir() out_info = os.path.join(outdir, 'update.csv') out_unknowns = os.path.join(outdir, 'unknowns.csv') args = [ '--unknowns', out_unknowns, '--out', out_info, self.seq_info, self.small_taxonomy_db ] log.info(self.log_info + ' '.join(map(str, args))) self.main(args) self.assertTrue(filecmp.cmp(out_info, ref_info)) self.assertTrue(filecmp.cmp(out_unknowns, ref_unknowns)) def test04(self): """ --ignore-unknowns --name-columns tax_name """ this_test = sys._getframe().f_code.co_name thisdata_path = self.thisdata_path ref_info = os.path.join(thisdata_path, this_test, 'update.csv') outdir = self.mkoutdir() out_info = os.path.join(outdir, 'update.csv') args = [ '--ignore-unknowns', '--name-column', 'tax_name', '--out', out_info, self.seq_info, self.small_taxonomy_db ] log.info(self.log_info + ' '.join(map(str, args))) self.main(args) self.assertTrue(filecmp.cmp(out_info, ref_info))
def load_data(self, filename): self.data = open(data_path(filename), 'r', encoding="utf-8")
lab_files.sort() return zip(img_files, lab_files) if __name__ == '__main__': if __package__ is None: import sys from os import path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) else: __package__ = '' import config datagen = CamVidGenerator(config.data_path()) batch_size = 3 target_size = (360, 480) for imgBatch, labelBatch in datagen.flow('train', batch_size, target_size): print(len(imgBatch)) img = imgBatch[0] label = labelBatch[0] colored_class_image = datagen.one_hot_to_bgr(label, target_size, datagen.n_classes, datagen.labels) cv2.imshow("img", img)
def read(self, filename): filepath = data_path(filename) self.raw_data = self.context.textFile(filepath)
def load(self, filename="words.txt"): filepath = data_path(filename) data = self.context.textFile(filepath) self.word_dict = data.map(lambda x: tuple(x.split(' ')))\ .map( lambda x: (x[0], int(x[1])) ).toDF(['word', 'num'])
def _stagepath(self): return data_path() + self._stage + "/"
return filenames if __name__ == '__main__': if __package__ is None: import sys from os import path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) else: __package__ = '' import cityscapes_labels import config datagen = GTAGenerator(dataset_path=config.data_path()) batch_size = 1 target_size = 256, 512 i = 3 for img, label in datagen.flow('val', batch_size, target_size): print(i, img.shape, label.shape) colored_class_image = datagen.one_hot_to_bgr(label[0], target_size, datagen.n_classes, datagen.labels) cv2.imshow("normalized", img[0]) cv2.imshow("gt", colored_class_image) cv2.waitKey()
TODO: rewrite routines to avoid writing files to disk TODO: use StringIO() instead of temp files TODO: pass around file-like objects """ import os import json from wave import open as open_audio import audioop import pyaudio from config import HM_DIR, data_path import alteration THRESHOLD_MULTIPLIER = 1.8 PASSIVE_AUDIO_FILE = data_path("passive.wav") ACTIVE_AUDIO_FILE = data_path("active.wav") RATE = 16000 CHUNK = 1024 # number of seconds to allow to establish threshold THRESHOLD_TIME = 1 # quirky bug where first import doesn't work try: import pocketsphinx as ps except: import pocketsphinx as ps
help='Input file', default='/home/mlyko/data/stuttgart_00.mp4') parser.add_argument('-o', '--output', help='Output file', default=None) args = parser.parse_args() return args args = parse_arguments() size = config.target_size() videoEvaluator = VideoEvaluator() videoEvaluator.select_device(args.gid) datagen = CityscapesFlowGenerator(config.data_path()) videoEvaluator.load_model({ 'model': ICNet(config.target_size(), datagen.n_classes, for_training=False), 'weights': config.weights_path() + 'city/rel/ICNet/1612:37e200.b8.lr-0.001000._dec-0.000000.of-farn.h5', 'warp': False }) videoEvaluator.load_model({ 'model': ICNetWarp0(config.target_size(), datagen.n_classes, for_training=False),
return output def translateFile(input_filename, output_filename=None): """ Translates a text file of sentences into a dictionary. """ out = subprocess.check_output([ 'phonetisaurus-g2p', '--model=%s' % (G014B2B_FST), '--input=%s' % (input_filename), '--words', '--isfile' ]) out = parseOutput(out) if output_filename: out = '\n'.join(out) f = open(output_filename, "wb") f.write(out) f.close() return None return out if __name__ == "__main__": translateFile(os.path.expanduser(data_path("/sentences.txt")), os.path.expanduser(data_path("/dictionary.dic")))
epochs, self.batch_size, losswise_params['optimizer']['lr'], losswise_params['optimizer']['decay'], self._optical_flow_type) self.prepare_callbacks(run_name, epochs) self.model.k.fit_generator(generator=train_generator, steps_per_epoch=train_steps, epochs=epochs, initial_epoch=restart_epoch, verbose=1, validation_data=val_generator, validation_steps=val_steps, callbacks=self.train_callbacks, max_queue_size=max_queue, shuffle=not self.is_debug, use_multiprocessing=multiprocess, workers=workers) # save final model self.model.save_final(self.get_run_path(run_name, '../../weights/'), epochs) if __name__ == '__main__': trainer = Trainer(model_name='mobile_unet', dataset_path=config.data_path(), target_size=(288, 480), batch_size=2, n_gpu=1, debug_samples=0)
y = [np.array(Y), np.array(Y2), np.array(Y3)] yield x, y if __name__ == '__main__': if __package__ is None: import sys from os import path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) else: __package__ = '' import config datagen = CityscapesGeneratorForICNet(config.data_path(), flip_enabled=True) batch_size = 1 # target_size = 288, 480 target_size = 256, 512 # target_size = 1024, 2048 # orig size for imgBatch, labelBatch in datagen.flow('train', batch_size, target_size): print(len(imgBatch)) img = imgBatch[0][0] label = labelBatch[0][0] colored_class_image = datagen.one_hot_to_bgr( label, tuple(a // 4 for a in target_size), datagen.n_classes,