def main(): setup_logging() args = parseOptions() gtf_files = args.gtf_files.split(",") fasta_files = args.fasta_files.split(",") output_dir = args.output_dir genome_build = args.genome_build name = args.name ver = args.version tx_filter = args.filter protein_map_file = args.protein_map_file # create temp dir tmpDir = tempfile.mkdtemp(prefix="onco_ensembl_ds_") try: logging.getLogger(__name__).info("Creating tmp dir (" + tmpDir + ") ....") ds_build_dir = tmpDir + "/" + genome_build + "/" os.mkdir(ds_build_dir) if not (args.gtf_files.lower().find("gencode") !=-1) and tx_filter == "basic": logging.getLogger(__name__).warn("basic filter requested for (apparently) a non-gencode set of GTFs. If this is an ENSEMBL run (not GENCODE), please specify dummy, using --filter.") logging.getLogger(__name__).info("Creating config file...") config_filename = ds_build_dir + "/" + name + ".config" logging.getLogger(__name__).info("config file being written to: " + os.path.abspath(config_filename)) config_file_creator = GenericTsvDatasourceCreator() idx_cols = DatasourceInstallUtils.indexCols("dummy_option", "dummy_values") config_file_creator._createConfigFile(configFilename=config_filename + ".tmp", baseDSFile=os.path.basename(gtf_files[0]),ds_type="ensembl", ds_version=ver, ds_name=name, indexCols=idx_cols) # Append the tx_filter and protein map file config_parser = SafeConfigParser() fp = file(config_filename + ".tmp", 'r') config_parser.readfp(fp) fp.close() config_parser.set("general", "transcript_filter", tx_filter) # Write updated config file fp = file(config_filename, 'w') config_parser.write(fp) fp.close() logging.getLogger(__name__).info("Starting index construction (temp location: " + ds_build_dir + ") ...") factory = GenomeBuildFactory() factory.construct_ensembl_indices(gtf_files, fasta_files, ds_build_dir + os.path.basename(gtf_files[0]), protein_id_mapping_file=protein_map_file) logging.getLogger(__name__).info("Creating datasource md5...") DatasourceInstallUtils.create_datasource_md5_file(ds_build_dir) logging.getLogger(__name__).info("Copying created datasource from temp directory to final location (" + output_dir + ")...") shutil.copytree(symlinks=True, src=tmpDir, dst=output_dir) except Exception as e: import traceback logging.getLogger(__name__).fatal((e.__repr__()) + " " + traceback.format_exc()) logging.getLogger(__name__).info(""""If you are getting and error such as: KeyError: 'ENST00000474204.1'), then you may be out of disk space in /tmp/.""") # Remove the tempdir logging.getLogger(__name__).info("Done...") logging.getLogger(__name__).info("Removing ..." + tmpDir + '/') shutil.rmtree(tmpDir)