def aggregate(config): setlogging(config, "aggregate") ''' annotate(config) Parameters ---------- config : dict The config dict generated in the gomap.py script. ''' from code.pipeline.run_argot2 import process_argot2, download_argot2 logging.info("Obtaining and aggregating Argot2.5 results") download_argot2(config) process_argot2(config) from code.pipeline.mixed2gaf import mixed2gaf, filter_mixed logging.info("Filtering mixed-method GAF") mixed2gaf(config) filter_mixed(config) from code.pipeline.make_aggregate import clean_duplicate, clean_redundant, aggregate_datasets logging.info("Cleaning and aggregating GAF files") clean_duplicate(config) clean_redundant(config) aggregate_datasets(config)
def setup(config): setlogging(config, "setup") """ setup(config) This function downloads the **GOMAP-data.tar.gz** directory from CyVerse and extracts the content to the **data** directory. The steps run by this function is given below 1. asdsdsa 2. sadsadsad 3. sadsadsad Parameters ---------- config : dict The config dict generated in the gomap.py script. """ outdir = "data/" cmd = ["irsync", "-rv", cyverse_path, outdir] logging.info("Downloading file from Cyverse using irsync") #The irsync will checksum the files on both ends and dtermine if the download is necessary and will only download if necessary # might take time to check if the files needs to be downloaded print(os.getcwd()) print(" ".join(cmd)) check_output_and_run("outfile", cmd) with open("data/compress_files.txt", "r") as comp_files: counter = 0 for infile in comp_files.readlines(): counter = counter + 1 outfile = outdir + infile.strip() gzfile = outdir + infile.strip() + ".gz" if os.path.exists(gzfile): if os.path.exists(outfile): print(gzfile + " already extracted") else: print("Extracting " + gzfile) with gzip.open(gzfile, "rb") as in_f: with open(outfile, "wb") as out_f: shutil.copyfileobj(in_f, out_f) os.remove(gzfile) else: print(gzfile + " doesn't exist") with open("data/tar_files.txt", "r") as comp_files: for infile in comp_files.readlines(): infile = infile.strip() outfile = outdir + infile.strip() tar_f = outdir + infile.strip() + ".tar.gz" base_dir = os.path.basename(outfile) if os.path.exists(tar_f): if os.path.exists(outfile): print(tar_f + " already extracted") else: print("Extracting " + tar_f) with tarfile.open(tar_f) as tar: tar.extractall("data/") os.remove(tar_f) else: print(tar_f + " doesn't exist")
def init_dirs(config): gomap_dir = config["input"]["workdir"] + "/GOMAP-" + config["input"][ "basename"] config["input"]["gomap_dir"] = gomap_dir if not os.path.exists(gomap_dir): os.makedirs(gomap_dir, mode=0777) excl_files = ['a'] for root, dir, files in os.walk(gomap_dir): excl_files = excl_files + files with open(config["pipeline"]["dir_struct"]) as tmp_file: dir_struct = tmp_file.read() with create_files(dir_struct) as workdir: results = pyrocopy.copy(workdir, gomap_dir, excludeFiles=excl_files, detailedResults=True) setlogging(config, "init_dirs") logging.info("Initializing directory structure") logging.info(pformat(results)) return (config)
config = merger.merge(pipe_config, user_config) config = init_dirs(config) copy_input(config) conf_out = config["input"]["gomap_dir"]+"/"+config["input"]["basename"]+".all.yml" config["input"]["config_file"] = conf_out with open(conf_out,"w") as out_f: yaml.dump(config,out_f) ''' Depending the step selected by the user we are going to run the relevant part of GO-MAP ''' if main_args.step == "seqsim": print("Running Sequence-similarity based Annotation Step") setlogging(config,"seqsim") logging.info("Running Sequence-similarity based Annotation Step") run_seqsim(config) elif main_args.step == "domain": print("Running Domain Based Annotation Step") setlogging(config,"domain") logging.info("Running Domain Based Annotation Step") run_domain(config) elif main_args.step == "mixmeth-blast": print("Running uniprot blast for mixed-methods") setlogging(config,"mixmeth-preproc") logging.info("Running uniprot blast for mixed-methods") run_mixmeth_blast(config) elif main_args.step == "mixmeth-preproc": print("Running preprocessing step for mixed-methods") setlogging(config,"mixmeth-preproc")