def _createDatasourcesMulticore(numProcesses, datasourceTuples): """ Private method to create the datasource list using multiple cores where possible. Currently, multicore functionality is only supported for gene, gp, and transcript tsvs.""" result = [] if len(datasourceTuples) > 0: logging.getLogger(__name__).info("Creating pool") # TODO: Create a default pool like Poco in C++. That way multiple classes can share the same pool. p = LoggingPool(processes=numProcesses) logging.getLogger(__name__).info("Pool created") # Split the datasources into tmpQueue, which holds the datasources that can be initialized in parallel. tmpQueue = [] tmpResult = [] for dsTuple in datasourceTuples: configParser = ConfigUtils.createConfigParser(dsTuple[0]) if configParser.get("general", "type") in ["gene_tsv", "gp_tsv", "gpp_tsv", "transcript_tsv"]: tmpQueue.append(dsTuple) else: result.append(DatasourceFactory.createDatasourceGivenTuple(dsTuple)) if len(tmpQueue) > 0: tmpResult = p.map(createDatasource, tmpQueue) result.extend(tmpResult) logging.getLogger(__name__).info("Mapping complete: " + str(len(tmpResult)) + " datasources created in multiprocess") p.close() p.join() else: logging.getLogger(__name__).info("No datasources to initialize") return result
def createDatasource(configFilename, leafDir): configParser = ConfigUtils.createConfigParser(configFilename) return DatasourceFactory.createDatasourceFromConfigParser(configParser, leafDir)