Esempio n. 1
0
def filter_and_write(mol2_files, ids, output_dir, includelist_filter, verbose):
    for mol2_file in mol2_files:
        if verbose:
            sys.stdout.write('Processing %s' % os.path.basename(mol2_file))
            sys.stdout.flush()

        if not os.path.exists(output_dir):
            os.mkdir(output_dir)

        mol2_outpath = os.path.join(output_dir, os.path.basename(mol2_file))

        if mol2_outpath.endswith('.gz'):
            write_mode = 'wb'
            open_file = gzip.open
        else:
            write_mode = 'w'
            open_file = open

        with open_file(mol2_outpath, write_mode) as f:
            if verbose:
                start = time.time()

            if includelist_filter:

                if write_mode == 'w':
                    for idx, mol2 in enumerate(split_multimol2(mol2_file)):

                        if mol2[0] in ids:
                            f.write(''.join(mol2[1]))
                else:
                    for idx, mol2 in enumerate(split_multimol2(mol2_file)):

                        if mol2[0].decode('utf-8') in ids:
                            f.write(b''.join(mol2[1]))

            else:
                if write_mode == 'w':
                    for idx, mol2 in enumerate(split_multimol2(mol2_file)):
                        if mol2[0] not in ids:
                            f.write(''.join(mol2[1]))
                else:
                    for idx, mol2 in enumerate(split_multimol2(mol2_file)):
                        if mol2[0].decode('utf-8') not in ids:
                            f.write(b''.join(mol2[1]))
            if verbose:
                elapsed = time.time() - start
                n_molecules = idx + 1
                sys.stdout.write(' | scanned %d molecules | %d mol/sec\n' %
                                 (n_molecules, n_molecules / elapsed))
                sys.stdout.flush()
Esempio n. 2
0
def test_split_multimol2_gz():
    all_mol2 = []
    for i in split_multimol2(os.path.join(this_dir,
                                          'data', '40_mol2_files.mol2.gz')):
        all_mol2.append(i[0])
    assert(all_mol2[1].decode() == 'ZINC04084113')
    assert(len(all_mol2) == 40)
def run_rocs(source_file, target_file, n_processes, settings):

    prefix = ''.join(target_file.split('.mol2')[:-1])

    sys.stdout.write('Processing %s\n' % os.path.basename(source_file))
    sys.stdout.flush()

    for idx, mol2 in enumerate(split_multimol2(QUERY_FILE)):
        if idx >= 1:
            mcquery = 'true'
            break
    if not idx:
        mcquery = 'false'

    cmd = [EXECUTABLE,
           '-ref', QUERY_FILE,
           '-dbase', source_file,
           '-outputquery', 'false',
           '-prefix', prefix,
           '-mcquery', mcquery,
           '-mpi_np', str(n_processes),
           '-oformat', 'mol2']

    if settings:
        for s in settings.split():
            s = s.strip()
            if s:
                cmd.append(s)

    subprocess.call(cmd, stdout=subprocess.PIPE, bufsize=1)
Esempio n. 4
0
def test_read_mol2_from_list():

    data_path = os.path.join(this_dir, 'data', '40_mol2_files.mol2')
    mol2 = next(split_multimol2(data_path))

    pdmol = PandasMol2().read_mol2_from_list(mol2_lines=mol2[1],
                                             mol2_code=mol2[0])
    assert pdmol.df.shape == (65, 9)
    assert pdmol.code == 'ZINC38611810'
def check_query(query_path):
    ids = [mol2[0] for mol2 in split_multimol2(query_path)]
    n_ids = len(ids)
    if n_ids > 1:
        n_unique_ids = len(set(ids))
        if n_unique_ids > 1:
            raise ValueError('Please Make sure that you only submit one'
                             ' molecule or, if you submit a multi-conformer'
                             ' query, that conformers of the molecule'
                             ' have all the same molecule ID labels.'
                             ' Found %d molecules and %d unique labels' %
                             (n_ids, n_unique_ids))
Esempio n. 6
0
def mol2_to_idfile(mol2_files, id_file_path, verbose=0):
    with open(id_file_path, 'w') as f:
        for mol2_file in mol2_files:

            if verbose:
                sys.stdout.write('Processing %s' % os.path.basename(mol2_file))
                sys.stdout.flush()
                start = time.time()

            for idx, mol2 in enumerate(split_multimol2(mol2_file)):
                f.write(mol2[0] + '\n')

            if verbose:
                elapsed = time.time() - start
                n_molecules = idx + 1
                sys.stdout.write(' | scanned %d molecules | %d mol/sec\n' %
                                 (n_molecules, n_molecules / elapsed))
                sys.stdout.flush()
Esempio n. 7
0
    def read_mol2(self, path, columns=None):

        mol2_code, mol2_lines = next(split_multimol2(path))
        self._load_mol2(mol2_lines, mol2_code, columns)
        self.mol2_path = path
        return self
Esempio n. 8
0
 def run_code():
     next(split_multimol2('40_mol2_files.pdb'))