Ejemplo n.º 1
0
def aggregate(config):
    setlogging(config, "aggregate")
    '''
    annotate(config)

    Parameters
    ----------
    config : dict
        The config dict generated in the gomap.py script.
    '''

    from code.pipeline.run_argot2 import process_argot2, download_argot2
    logging.info("Obtaining and aggregating Argot2.5 results")
    download_argot2(config)
    process_argot2(config)

    from code.pipeline.mixed2gaf import mixed2gaf, filter_mixed
    logging.info("Filtering mixed-method GAF")
    mixed2gaf(config)
    filter_mixed(config)

    from code.pipeline.make_aggregate import clean_duplicate, clean_redundant, aggregate_datasets
    logging.info("Cleaning and aggregating GAF files")
    clean_duplicate(config)
    clean_redundant(config)
    aggregate_datasets(config)
Ejemplo n.º 2
0
def setup(config):
    setlogging(config, "setup")
    """
    setup(config)

    This function downloads the **GOMAP-data.tar.gz** directory from CyVerse and extracts the content to the **data** directory. The steps run by this function is given below

    1. asdsdsa
    2. sadsadsad
    3. sadsadsad

    Parameters
    ----------
    config : dict
        The config dict generated in the gomap.py script.
    """

    outdir = "data/"
    cmd = ["irsync", "-rv", cyverse_path, outdir]
    logging.info("Downloading file from Cyverse using irsync")
    #The irsync will checksum the files on both ends and dtermine if the download is necessary and will only download if necessary
    # might take time to check if the files needs to be downloaded
    print(os.getcwd())
    print(" ".join(cmd))
    check_output_and_run("outfile", cmd)

    with open("data/compress_files.txt", "r") as comp_files:
        counter = 0
        for infile in comp_files.readlines():
            counter = counter + 1
            outfile = outdir + infile.strip()
            gzfile = outdir + infile.strip() + ".gz"
            if os.path.exists(gzfile):
                if os.path.exists(outfile):
                    print(gzfile + " already extracted")
                else:
                    print("Extracting " + gzfile)
                    with gzip.open(gzfile, "rb") as in_f:
                        with open(outfile, "wb") as out_f:
                            shutil.copyfileobj(in_f, out_f)
                    os.remove(gzfile)
            else:
                print(gzfile + " doesn't exist")

    with open("data/tar_files.txt", "r") as comp_files:
        for infile in comp_files.readlines():
            infile = infile.strip()
            outfile = outdir + infile.strip()
            tar_f = outdir + infile.strip() + ".tar.gz"
            base_dir = os.path.basename(outfile)
            if os.path.exists(tar_f):
                if os.path.exists(outfile):
                    print(tar_f + " already extracted")
                else:
                    print("Extracting " + tar_f)
                    with tarfile.open(tar_f) as tar:
                        tar.extractall("data/")
                    os.remove(tar_f)
            else:
                print(tar_f + " doesn't exist")
Ejemplo n.º 3
0
def init_dirs(config):
    gomap_dir = config["input"]["workdir"] + "/GOMAP-" + config["input"][
        "basename"]
    config["input"]["gomap_dir"] = gomap_dir
    if not os.path.exists(gomap_dir):
        os.makedirs(gomap_dir, mode=0777)
    excl_files = ['a']
    for root, dir, files in os.walk(gomap_dir):
        excl_files = excl_files + files
    with open(config["pipeline"]["dir_struct"]) as tmp_file:
        dir_struct = tmp_file.read()
        with create_files(dir_struct) as workdir:
            results = pyrocopy.copy(workdir,
                                    gomap_dir,
                                    excludeFiles=excl_files,
                                    detailedResults=True)
            setlogging(config, "init_dirs")
            logging.info("Initializing directory structure")
            logging.info(pformat(results))
    return (config)
Ejemplo n.º 4
0
config = merger.merge(pipe_config, user_config)
config = init_dirs(config)
copy_input(config)

conf_out = config["input"]["gomap_dir"]+"/"+config["input"]["basename"]+".all.yml"
config["input"]["config_file"] = conf_out
with open(conf_out,"w") as out_f:
	yaml.dump(config,out_f)

'''
Depending the step selected by the user we are going to run the relevant part of GO-MAP
'''

if main_args.step == "seqsim":
    print("Running Sequence-similarity based Annotation Step")
    setlogging(config,"seqsim")
    logging.info("Running Sequence-similarity based Annotation Step")
    run_seqsim(config)
elif main_args.step == "domain":
    print("Running Domain Based Annotation Step")
    setlogging(config,"domain")
    logging.info("Running Domain Based Annotation Step")
    run_domain(config)
elif main_args.step == "mixmeth-blast":
    print("Running uniprot blast for mixed-methods")
    setlogging(config,"mixmeth-preproc")
    logging.info("Running uniprot blast for mixed-methods")
    run_mixmeth_blast(config)
elif main_args.step == "mixmeth-preproc":
    print("Running preprocessing step for mixed-methods")
    setlogging(config,"mixmeth-preproc")