def mp_helper_special_mode(one_fragment, fragment_file_chunks, options, chunk_size=100): iteration_depth = options.iteration_depth repeats = options.repeats results = list() # one_fragment in a list of one fragment fragments = one_fragment def mp_callback(res): r, f = res if r: results.append(r) if f: fragments.append(f) for repeat_number in range(repeats): logging.info( '%s run. You can change the repeat count with --repeats.' % (repeat_number + 1)) for counter, fragment_file_two in enumerate(fragment_file_chunks): combined_fragments = tempfile.NamedTemporaryFile( dir=temp_dir, prefix='tmp_fragments_', delete=False) unique_files(fragments, combined_fragments, '1') #logging.debug('fragments (%s)' % ( open(combined_fragments.name).read().strip() )) if counter > 0: clean(fragments) fragments = [combined_fragments.name] #logging.debug('Linecount: %s (%s) - loop-counter (%s)' % ( CountLines( combined_fragments.name ), combined_fragments.name, counter)) splitted_files = split_smi_library(combined_fragments.name, chunk_size) logging.debug( 'Fragments to process: %s (%s); Files to process: %s (%s)' % (CountLines(combined_fragments.name), combined_fragments.name, len(splitted_files), counter)) pool = multiprocessing.Pool(options.processors) for fragment_file in splitted_files: pool.apply_async(mp_helper, args=(fragment_file, fragment_file_two, iteration_depth), callback=mp_callback) pool.close() pool.join() clean(splitted_files) if results: combined_results = tempfile.NamedTemporaryFile(dir=temp_dir, prefix='tmp_results_', delete=False) unique_files(results, combined_results, '2') clean(results) return combined_results.name else: return False
def run(compound_path, procs, schema, conn_string, filetype): clean_up_files = False if os.path.isdir(compound_path): paths = [] for root, dirs, files in os.walk(compound_path): for compound_file in files: path = os.path.join(root, compound_file) paths.append((path, schema, conn_string, filetype)) paths.sort() else: if filetype in ["smi", "inchi"]: clean_up_files = True compound_count = cheminfolib.CountLines(compound_path) sys.stdout.write( "Splitting inputfile (%s) with %s molecules in files with %s molecules.\n" % (compound_path, compound_count, int(compound_count / procs))) paths = cheminfolib.split_smi_library(compound_path, structures_in_one_file=int( compound_count / procs)) paths = [(path, schema, conn_string, filetype) for path in paths] if filetype == "sdf": paths = [compound_path] pool = Pool(processes=procs) sys.stdout.write("Process initialized with %s processes.\n" % procs) result = pool.map_async(start_parser, paths) result.get() if clean_up_files: for path in paths: os.remove(path[0])
def run(compound_path, procs, schema, conn_string, filetype): clean_up_files = False if os.path.isdir( compound_path ): paths = [] for root, dirs, files in os.walk(compund_path): for filename in files: path = os.path.join( root, compound_file ) paths.append( (path, schema, conn_string, filetype) ) paths.sort() else: if filetype in ['smi', 'inchi']: clean_up_files = True compound_count = cheminfolib.CountLines( compound_path ) sys.stdout.write('Splitting inputfile (%s) with %s molecules in files with %s molecules.\n' % (compound_path, compound_count, int(compound_count / procs))) paths = cheminfolib.split_smi_library( compound_path, structures_in_one_file = int(compound_count / procs) ) paths = [(path, schema, conn_string, filetype) for path in paths] if filetype == 'sdf': paths = [compound_path] pool = Pool(processes = procs) sys.stdout.write('Process initialized with %s processes.\n' % procs) result = pool.map_async(start_parser, paths) result.get() if clean_up_files: for path in paths: os.remove( path[0] )
def mp_helper_special_mode(one_fragment, fragment_file_chunks, options, chunk_size = 100): iteration_depth = options.iteration_depth repeats = options.repeats results = list() # one_fragment in a list of one fragment fragments = one_fragment def mp_callback(res): r, f = res if r: results.append(r) if f: fragments.append(f) for repeat_number in range(repeats): logging.info('%s run. You can change the repeat count with --repeats.' % ( repeat_number + 1 ) ) for counter, fragment_file_two in enumerate( fragment_file_chunks ): combined_fragments = tempfile.NamedTemporaryFile(dir=temp_dir, prefix='tmp_fragments_', delete=False) unique_files( fragments, combined_fragments, '1' ) #logging.debug('fragments (%s)' % ( open(combined_fragments.name).read().strip() )) if counter > 0: clean( fragments ) fragments = [combined_fragments.name] #logging.debug('Linecount: %s (%s) - loop-counter (%s)' % ( CountLines( combined_fragments.name ), combined_fragments.name, counter)) splitted_files = split_smi_library( combined_fragments.name, chunk_size ) logging.debug('Fragments to process: %s (%s); Files to process: %s (%s)' % ( CountLines(combined_fragments.name), combined_fragments.name, len(splitted_files), counter) ) pool = multiprocessing.Pool( options.processors ) for fragment_file in splitted_files: pool.apply_async(mp_helper, args=(fragment_file, fragment_file_two, iteration_depth), callback=mp_callback) pool.close() pool.join() clean( splitted_files ) if results: combined_results = tempfile.NamedTemporaryFile(dir=temp_dir, prefix='tmp_results_', delete=False) unique_files( results, combined_results, '2' ) clean( results ) return combined_results.name else: return False
log_level = logging.INFO logging.basicConfig(level=log_level) test(options) temp_dir = tempfile.mkdtemp(dir=options.tempdir) unique_compounds = set() multiple_merge_compounds = set() trash_list = list() result_files = list() unique_input, unique_input_non_fragments = filter_input_files( options.input_path) # adding the non-fragments to the results # result_files.append( unique_input_non_fragments.name ) splitted_files = split_smi_library(unique_input.name, 1) trash_list.extend(splitted_files) # If we have two input files, merge one against the other if options.second_input: logging.info("Merging file %s against file %s." % (options.input_path, options.second_input)) unique_input2, unique_input_non_fragments2 = filter_input_files( options.second_input) # adding the non-fragments to the results # result_files.append( unique_input_non_fragments2.name ) splitted_files2 = split_smi_library(unique_input2.name, 1) trash_list.extend(splitted_files2) for counter, fragment_file_one in enumerate(splitted_files):
logging.basicConfig(level=log_level) test(options) temp_dir = tempfile.mkdtemp(dir=options.tempdir) unique_compounds = set() multiple_merge_compounds = set() trash_list = list() result_files = list() unique_input, unique_input_non_fragments = filter_input_files( options.input_path ) # adding the non-fragments to the results # result_files.append( unique_input_non_fragments.name ) splitted_files = split_smi_library( unique_input.name, 1 ) trash_list.extend( splitted_files ) # If we have two input files, merge one against the other if options.second_input: logging.info('Merging file %s against file %s.' % (options.input_path, options.second_input )) unique_input2, unique_input_non_fragments2 = filter_input_files( options.second_input ) # adding the non-fragments to the results # result_files.append( unique_input_non_fragments2.name ) splitted_files2 = split_smi_library( unique_input2.name, 1 ) trash_list.extend( splitted_files2 ) for counter, fragment_file_one in enumerate( splitted_files ):