output_file.write('\t'.join(output) + '\n') # Enough? num_processed += 1 if limit and num_processed >= limit: break return num_processed if __name__ == '__main__': parser = argparse.ArgumentParser('Vendor Compound Standardiser (Enamine)') parser.add_argument('vendor_dir', help='The Enamine vendor directory,' ' containing the ".gz" files to be processed.') parser.add_argument('vendor_prefix', help='The Enamine vendor file prefix,' ' i.e. "June2018". Only files with this prefix' ' in the vendor directory will be processed') parser.add_argument('output', help='The output directory') parser.add_argument('--output-is-prefix', action='store_true', help='Use the output as filename prefix rather than' ' a directory. This is useful in nextflow' ' workflows') parser.add_argument('-l', '--limit', type=int, default=0,
num_vendor_molecule_failures += 1 traceback.print_exc() # Enough? num_processed += 1 if limit and num_processed >= limit: break return num_processed if __name__ == '__main__': parser = argparse.ArgumentParser('Vendor Compound Standardiser (SDF)') parser.add_argument('vendor_dir', help='The SDF vendor directory,' ' containing the ".gz" files to be processed.') parser.add_argument('vendor_prefix', help='The SDF vendor file prefix,' ' i.e. "iis_smiles". Only files with this prefix' ' in the vendor directory will be processed') parser.add_argument('output', help='The output directory') parser.add_argument( '--id-field', help= 'Name of the field for the compound ID. If not specified the title line is used' ) parser.add_argument('--prefix', required=True, help='Prefix for the compound ID')
output_file.write('\t'.join(output) + '\n') # Enough? num_processed += 1 if limit and num_processed >= limit: break return num_processed if __name__ == '__main__': parser = argparse.ArgumentParser('Vendor Compound Standardiser (ChEMBL)') parser.add_argument( '--input', '-i', help='The ChEMBL chembl_*.sdf.gz file to be processed.') parser.add_argument('--output', '-o', help='The output directory') parser.add_argument('--output-is-prefix', action='store_true', help='Use the output as filename prefix rather than' ' a directory. This is useful in nextflow' ' workflows') parser.add_argument('--limit', '-l', type=int, default=0, help='Limit processing to the first N molecules,' ' process all otherwise') args = parser.parse_args()
# Write the standardised data output = [ smiles, smiles, smiles, '0', '%s%s' % (compound_prefix, line_num) ] output_file.write('\t'.join(output) + '\n') lines_processed += 1 return lines_processed if __name__ == '__main__': parser = argparse.ArgumentParser('Exclusion Standardiser') parser.add_argument('exclusion_file', help='The file of exclusions.' ' A ".gz" of excluded SMILES strings.') parser.add_argument('output', help='The output directory, where' ' the standard file will be written') args = parser.parse_args() # Open the file we'll write the standardised data set to. # A text, tab-separated file. output_filename = os.path.join(args.output, '{}.gz'.format(output_filename)) logger.info('Writing %s...', output_filename) num_processed = 0 with gzip.open(output_filename, 'wt') as output_gzip_file: