def main(dirs):
    for this_dir in dirs:
        print this_dir
        for filename in glob.glob(os.path.join(this_dir, '*.json.gz')):
            parser = PcbaJsonParser(filename)
            tree = parser.tree
            aid = parser.get_aid()
            try:
                del tree['PC_AssaySubmit']['data']
            except KeyError as e:
                print 'JSON is not properly formatted. Please follow NCBI FTP format.'
                raise e
            with gzip.open(
                    os.path.join(this_dir, '{}-desc.json.gz'.format(aid)),
                    'wb') as f:
                json.dump(tree, f, indent=2)
def main(dirs, output_filename):
    targets = []
    aids = []
    for this_dir in dirs:
        print this_dir
        for filename in glob.glob(os.path.join(this_dir, '*.json.gz')):
            parser = PcbaJsonParser(filename)
            aid = parser.get_aid()
            target = parser.get_target()
            if target is None or len(target) > 1:
                continue
            try:
                mol_id = target[0]['mol_id']
            except KeyError:
                print '\tAID {} target has no mol_id'.format(aid)
                continue
            print '\tAID {} => {}'.format(aid, mol_id)
            targets.append(mol_id)
            aids.append(aid)
    print 'Found {} targets'.format(len(aids))
    with open(output_filename, 'wb') as f:
        for aid, target in zip(aids, targets):
            f.write('{}\t{}\n'.format(aid, target))
Esempio n. 3
0
 def setUp(self):
   """
   Set up tests.
   """
   self.data_dir = os.path.split(os.path.realpath(__file__))[0]
   self.parser = PcbaJsonParser(
     os.path.join(self.data_dir, 'data/aid490.json'))
   self.no_target = PcbaJsonParser(
     os.path.join(self.data_dir, 'data/aid1.json'))
   self.confirmatory = self.no_target
   self.multiple_target = PcbaJsonParser(
     os.path.join(self.data_dir, 'data/aid429.json'))
   self.gzip_parser = PcbaJsonParser(
     os.path.join(self.data_dir, 'data/aid490.json.gz'))
   self.rest_parser = PcbaJsonParser(
     os.path.join(self.data_dir, 'data/aid1-rest.json'))
   self.data_parser = PcbaJsonParser(
     os.path.join(self.data_dir, 'data/999.json.gz'))
   self.target_keys = ['name', 'mol_id', 'molecule_type', 'organism']
Esempio n. 4
0
 def setUp(self):
   self.handler = PcbaPandasHandler()
   self.data_dir = os.path.split(os.path.realpath(__file__))[0]
   self.parser = PcbaJsonParser(
     os.path.join(self.data_dir, 'data/aid1.json'))