Exemple #1
0
def fimo(motif,
         bg_file=None,
         fasta_file=None,
         tempdir=None,
         motifdatabase=None,
         thresh=None,
         largewindow=None):
    '''This function runs fimo on a given fastafile for a single motif in a 
        provided motif database. The output is cut and sorted to convert into 
        a sorted bed file

    Parameters
    ----------
    tempdir : string
        full path to temp directory in output directory (created by TFEA)

    motifdatabase : string
        full path to a motif database file in meme format

    bgfile : string
        full path to a markov background model

    motif : string
        the name of a motif that matches a motif within motifdatabase

    fastafile : string
        full path to a fasta file that fimo will perform motif scanning on
        
    Returns
    -------
    fimo_out : string
        full path to where fimo output which is stored within the tempdir 
        directory.
    '''
    fimo_out = tempdir / (motif + '.fimo.bed')
    if bg_file is not None:
        command = ("fimo", "--skip-matched-sequence", "--verbosity", "1",
                   "--thresh", str(thresh), "--bgfile", bg_file, "--motif",
                   motif, motifdatabase, fasta_file)
    else:
        command = ("fimo",
                   "--skip-matched-sequence", "--verbosity", "1", "--thresh",
                   str(thresh), "--motif", motif, motifdatabase, fasta_file)

    try:
        fimo_out = subprocess.check_output(
            command, stderr=subprocess.PIPE).decode('UTF-8')
    except subprocess.CalledProcessError as e:
        raise exceptions.SubprocessError(e.stderr.decode())

    # fasta_count = fasta_linecount(fastafile=fasta_file)
    names = fasta_names(fastafile=fasta_file)
    distances = fimo_parse_stdout(fimo_stdout=fimo_out,
                                  largewindow=largewindow,
                                  names=names)
    # linecount=fasta_count)

    del fimo_out

    return [motif] + distances
Exemple #2
0
def fasta_markov(tempdir=None, fastafile=None, order=None):
    '''This function runs meme's fasta-get-markov function that generates a 
        background markov file (for use with fimo) from a fasta file.

    Parameters
    ----------
    tempdir : string
        full path to temp directory in output directory (created by TFEA)

    fastafile : string
        full path to fasta file that will be used to generate the markov
        background model file

    order : string
        an integer formatted as a string where a user may specify what order
        markov model they would like (default='0')
        
    Returns
    -------
    None
    '''
    markov_background = tempdir / "markov_background.txt"
    try:
        with open(markov_background, 'w') as output:
            subprocess.run(["fasta-get-markov", "-m", order, fastafile],
                           stdout=output,
                           stderr=subprocess.PIPE)
    except subprocess.CalledProcessError as e:
        raise exceptions.SubprocessError(e.stderr.decode())

    return markov_background
Exemple #3
0
def create_directories(srcdirectory=None):
    from TFEA import config
    if config.vars['SBATCH'] == False:  #No sbatch flag
        make_out_directories(create=True)
        write_rerun(args=sys.argv, outputdir=config.vars['OUTPUT'])
        write_vars(config_vars=config.vars,
                   outputfile=config.vars['OUTPUT'] / 'inputs.txt')
        config.vars['JOBID'] = 0
    elif str(config.vars['SBATCH']) == 'SUBMITTED':  #Internal flag
        make_out_directories(create=False)
        config.vars['JOBID'] = (config.vars['TEMPDIR'] /
                                'jobid.txt').read_text().strip('\n')
    else:  #--sbatch specified
        make_out_directories(create=True)
        write_rerun(args=sys.argv, outputdir=config.vars['OUTPUT'])
        write_vars(config_vars=config.vars,
                   outputfile=config.vars['OUTPUT'] / 'inputs.txt')
        script = srcdirectory / 'main.sbatch'
        email = str(config.vars['SBATCH'])
        error_file = config.vars['E_AND_O'] / (
            'TFEA_' + config.vars['OUTPUT'].name + '.err')
        args = sys.argv
        if '--sbatch' in args:
            args[args.index('--sbatch') + 1] = 'SUBMITTED'
        else:
            args.append('--sbatch')
            args.append('SUBMITTED')
        if '--venv' in args:
            venv = args[args.index('--venv') + 1]
        else:
            venv = '.'
        try:
            sbatch_out = subprocess.run([
                "sbatch", "--error=" +
                (config.vars['E_AND_O'] / "%x.err").as_posix(), "--output=" +
                (config.vars['E_AND_O'] / "%x.out").as_posix(),
                "--mail-user="******"--export=cmd=" + ' '.join(args) + ',' + 'venv=' + venv,
                "--job-name=TFEA_" + config.vars['OUTPUT'].name, "--ntasks=" +
                str(config.vars['CPUS']), "--mem=" + str(config.vars['MEM']),
                "--time=" + str(config.vars['TIME']),
                "--partition=" + str(config.vars['PARTITION']), script
            ],
                                        stderr=subprocess.PIPE,
                                        stdout=subprocess.PIPE,
                                        check=True)
        except subprocess.CalledProcessError as e:
            raise exceptions.SubprocessError(e.stderr.decode())

        (config.vars['TEMPDIR'] / 'jobid.txt').write_text(
            sbatch_out.stdout.decode().split()[-1])
        print(("TFEA has been submitted using an sbatch script. \nIt can be "
               "monitored using:\ntail -f " + error_file.as_posix()))
        sys.exit()
Exemple #4
0
def mumerge(input_file,
            output_basename,
            bed1=[],
            bed2=[],
            label1=None,
            label2=None,
            mumerge_path=Path(__file__).absolute().parent / 'mumerge.py'):
    '''This function runs MuMerge, a script written by Jacob T. Stanley that 
        merges a list of bed files in a probabilistic way.
        
    Parameters
    ----------
    input_file: path to .txt file
        A .txt file formatted according to MuMerge specifications. From doc:
            Input file containing bedfiles, sample ID's, and replicate groupings. Input
            file (indicated by the '-i' flag) should be of the following (tab delimited)
            format:

            #file   sampid  group
            /full/file/path/filename1.bed   sampid1 A
            /full/file/path/filename2.bed   sampid2 B
            ...
            
            Header line indicated by '#' character must be included and fields must
            follow the same order as non-header lines. The order of subsequent lines does
            matter. 'group' identifiers should group files that are technical/biological
            replicates. Different experimental conditions should recieve different 'group'
            identifiers. The 'group' identifier can be of type 'int' or 'str'. If 'sampid'
            is not specified, then default sample ID's will be used.
            
    output_basename: Path to output file without file extension
        From doc:
            Output file basename (full path, sans extension).
            WARNING: will overwrite any existing file)'''
    with open(input_file, 'w') as F:
        F.write("#file\tsampid\tgroup\n")
        for i, bedpath in enumerate(bed1, 1):
            F.write(f'{bedpath}\t{label1}{i}\t{label1}\n')
        for i, bedpath in enumerate(bed2, 1):
            F.write(f'{bedpath}\t{label2}{i}\t{label2}\n')

    mumerge_command = [
        'python3', mumerge_path, '-i', input_file, '-o', output_basename
    ]
    try:
        subprocess.check_output(mumerge_command, stderr=subprocess.PIPE)
    except subprocess.CalledProcessError as e:
        raise exceptions.SubprocessError(e.stderr.decode())
    combined_file = Path(str(output_basename) + '_MUMERGE.bed')

    return combined_file
Exemple #5
0
def meme_logo(motif_file, motif_ID, figuredir, plot_format=None):
    '''Runs meme2images that creates logo images
    '''
    meme2images_command = ['meme2images', '-rc', '-eps', '-motif', motif_ID, 
                            motif_file, figuredir]
    motif_ID = motif_ID.replace('.', '_')
    imagemagick_command = ['convert', figuredir / ('logo'+motif_ID+'.eps'), 
                            figuredir / (f'logo{motif_ID}.png')]
    imagemagick_rc_command = ['convert', figuredir / ('logo_rc'+motif_ID+'.eps'), 
                            figuredir / (f'logo_rc{motif_ID}.png')]
    try:
        subprocess.check_output(meme2images_command, stderr=subprocess.PIPE)
        subprocess.check_output(imagemagick_command, stderr=subprocess.PIPE)
        subprocess.check_output(imagemagick_rc_command, stderr=subprocess.PIPE)
    except subprocess.CalledProcessError as e:
        raise exceptions.SubprocessError(e.stderr.decode())
    return
Exemple #6
0
def getfasta(bedfile=None, genomefasta=None, tempdir=None, outname=None):
    '''Converts a bed file to a fasta file using bedtools. Outputs into the 
        tempdir directory created by TFEA.

    Parameters
    ----------
    bedfile : string
        full path to a bed file

    genomefasta : string
        full path to a fasta file for the genome of interest
        
    tempdir : string
        full path to temp directory in output directory (created by TFEA)

    Returns
    -------
    ranked_file_fasta : string 
        full path to a fasta file containing the inputted bed file regions in 
        fasta format 
    '''
    fasta_file = tempdir / outname
    #pybedtools implementation (incomplete)
    # pybed = BedTool(bedfile).sequence(fi=genomefasta).saveas(fasta_file)

    getfasta_command = [
        "bedtools", "getfasta", "-fi", genomefasta, "-bed", bedfile, "-fo",
        fasta_file
    ]

    try:
        subprocess.run(getfasta_command,
                       check=True,
                       stdout=subprocess.PIPE,
                       stderr=subprocess.PIPE)
    except subprocess.CalledProcessError as e:
        raise exceptions.SubprocessError(e.stderr.decode())

    return fasta_file
Exemple #7
0
def bedtools_closest(motif,
                     genomehits=None,
                     ranked_center_file=None,
                     tempdir=None,
                     distance_cutoff=None,
                     rank_index=None):
    '''Calculates nearest motif hit from a bed file. TFEA provides this 
        function with a bed file containing the center of the inputted regions.

    Parameters
    ----------
    TFresults : list of lists
        contains calculated enrichment scores for all TFs of interest specified
        by the user
        
    Returns
    -------
    motif_distance_bed_sorted : string
        full path to where the sorted motif distance file was outputted
    '''
    try:
        motif_path = genomehits / motif
        if os.stat(motif_path).st_size == 0:
            return [motif] + [
                '.' for i in range(os.stat(ranked_center_file).st_size)
            ]

        command = ("bedtools", "closest", "-D", "ref", "-t", "first", "-a",
                   ranked_center_file, "-b", motif_path)
        closest_out = tempdir / (motif + '.closest.bed')

        # import sys
        # print(' '.join([str(c) for c in command]) + ' > ' + closest_out.as_posix(), file=sys.stderr)

        try:
            closest_out.write_bytes(
                subprocess.check_output(command, stderr=subprocess.PIPE))
        except subprocess.CalledProcessError as e:
            raise exceptions.SubprocessError(e.stderr.decode())

        distances = list()
        ranks = list()
        with open(closest_out) as F:
            for line in F:
                linelist = line.strip('\n').split('\t')
                distance = int(linelist[-1])
                if rank_index is not None:
                    rank = int(linelist[rank_index].split(',')[-1])
                    ranks.append(rank)
                if abs(distance) <= distance_cutoff:
                    distances.append(distance)
                else:
                    distances.append('.')
        if rank_index is not None:
            distances = [x for i, x in sorted(zip(ranks, distances))]
        closest_out.unlink()

    except Exception as e:
        # This prints the type, value, and stack trace of the
        # current exception being handled.
        print(traceback.print_exc())
        raise e

    return [motif.strip('.bed')] + distances