def main(dirs): for this_dir in dirs: print this_dir for filename in glob.glob(os.path.join(this_dir, '*.json.gz')): parser = PcbaJsonParser(filename) tree = parser.tree aid = parser.get_aid() try: del tree['PC_AssaySubmit']['data'] except KeyError as e: print 'JSON is not properly formatted. Please follow NCBI FTP format.' raise e with gzip.open( os.path.join(this_dir, '{}-desc.json.gz'.format(aid)), 'wb') as f: json.dump(tree, f, indent=2)
def main(dirs, output_filename): targets = [] aids = [] for this_dir in dirs: print this_dir for filename in glob.glob(os.path.join(this_dir, '*.json.gz')): parser = PcbaJsonParser(filename) aid = parser.get_aid() target = parser.get_target() if target is None or len(target) > 1: continue try: mol_id = target[0]['mol_id'] except KeyError: print '\tAID {} target has no mol_id'.format(aid) continue print '\tAID {} => {}'.format(aid, mol_id) targets.append(mol_id) aids.append(aid) print 'Found {} targets'.format(len(aids)) with open(output_filename, 'wb') as f: for aid, target in zip(aids, targets): f.write('{}\t{}\n'.format(aid, target))
def setUp(self): """ Set up tests. """ self.data_dir = os.path.split(os.path.realpath(__file__))[0] self.parser = PcbaJsonParser( os.path.join(self.data_dir, 'data/aid490.json')) self.no_target = PcbaJsonParser( os.path.join(self.data_dir, 'data/aid1.json')) self.confirmatory = self.no_target self.multiple_target = PcbaJsonParser( os.path.join(self.data_dir, 'data/aid429.json')) self.gzip_parser = PcbaJsonParser( os.path.join(self.data_dir, 'data/aid490.json.gz')) self.rest_parser = PcbaJsonParser( os.path.join(self.data_dir, 'data/aid1-rest.json')) self.data_parser = PcbaJsonParser( os.path.join(self.data_dir, 'data/999.json.gz')) self.target_keys = ['name', 'mol_id', 'molecule_type', 'organism']
def setUp(self): self.handler = PcbaPandasHandler() self.data_dir = os.path.split(os.path.realpath(__file__))[0] self.parser = PcbaJsonParser( os.path.join(self.data_dir, 'data/aid1.json'))