Esempio n. 1
0
            output_file.write('\t'.join(output) + '\n')

            # Enough?
            num_processed += 1
            if limit and num_processed >= limit:
                break

    return num_processed


if __name__ == '__main__':

    parser = argparse.ArgumentParser('Vendor Compound Standardiser (Enamine)')
    parser.add_argument('vendor_dir',
                        help='The Enamine vendor directory,'
                        ' containing the ".gz" files to be processed.')
    parser.add_argument('vendor_prefix',
                        help='The Enamine vendor file prefix,'
                        ' i.e. "June2018". Only files with this prefix'
                        ' in the vendor directory will be processed')
    parser.add_argument('output', help='The output directory')
    parser.add_argument('--output-is-prefix',
                        action='store_true',
                        help='Use the output as filename prefix rather than'
                        ' a directory. This is useful in nextflow'
                        ' workflows')
    parser.add_argument('-l',
                        '--limit',
                        type=int,
                        default=0,
            num_vendor_molecule_failures += 1
            traceback.print_exc()

        # Enough?
        num_processed += 1
        if limit and num_processed >= limit:
            break

    return num_processed


if __name__ == '__main__':

    parser = argparse.ArgumentParser('Vendor Compound Standardiser (SDF)')
    parser.add_argument('vendor_dir',
                        help='The SDF vendor directory,'
                        ' containing the ".gz" files to be processed.')
    parser.add_argument('vendor_prefix',
                        help='The SDF vendor file prefix,'
                        ' i.e. "iis_smiles". Only files with this prefix'
                        ' in the vendor directory will be processed')

    parser.add_argument('output', help='The output directory')
    parser.add_argument(
        '--id-field',
        help=
        'Name of the field for the compound ID. If not specified the title line is used'
    )
    parser.add_argument('--prefix',
                        required=True,
                        help='Prefix for the compound ID')
            output_file.write('\t'.join(output) + '\n')

            # Enough?
            num_processed += 1
            if limit and num_processed >= limit:
                break

    return num_processed


if __name__ == '__main__':

    parser = argparse.ArgumentParser('Vendor Compound Standardiser (ChEMBL)')
    parser.add_argument(
        '--input',
        '-i',
        help='The ChEMBL chembl_*.sdf.gz file to be processed.')
    parser.add_argument('--output', '-o', help='The output directory')
    parser.add_argument('--output-is-prefix',
                        action='store_true',
                        help='Use the output as filename prefix rather than'
                        ' a directory. This is useful in nextflow'
                        ' workflows')
    parser.add_argument('--limit',
                        '-l',
                        type=int,
                        default=0,
                        help='Limit processing to the first N molecules,'
                        ' process all otherwise')

    args = parser.parse_args()
            # Write the standardised data
            output = [
                smiles, smiles, smiles, '0',
                '%s%s' % (compound_prefix, line_num)
            ]
            output_file.write('\t'.join(output) + '\n')
            lines_processed += 1

    return lines_processed


if __name__ == '__main__':

    parser = argparse.ArgumentParser('Exclusion Standardiser')
    parser.add_argument('exclusion_file',
                        help='The file of exclusions.'
                        ' A ".gz" of excluded SMILES strings.')
    parser.add_argument('output',
                        help='The output directory, where'
                        ' the standard file will be written')

    args = parser.parse_args()

    # Open the file we'll write the standardised data set to.
    # A text, tab-separated file.
    output_filename = os.path.join(args.output,
                                   '{}.gz'.format(output_filename))
    logger.info('Writing %s...', output_filename)
    num_processed = 0
    with gzip.open(output_filename, 'wt') as output_gzip_file: