def get_motif_from_tsv(genom, config):
    """Create a motif by reading csv file"""
    df = pd.read_csv(config.motif_file, sep='\t')
    df = df[(df.TF == config.TF)
            & (df.genome_accession == genome.accession_full(genom))]
    sites = run_lasagna(df, genom)
    return motif.new_motif(sites)
def random_motif(ref, target, config):
    """Given reference motif M having k sites, generate k random subsequences
    from target genome and build a motif from randomly generated sites"""
    true_motif = get_true_motif(ref, config)
    mlen = motif.length(true_motif)
    msize = motif.size(true_motif)
    sites = [genome.random_seq(target, mlen) for _ in range(msize)]
    return motif.new_motif(sites)
def meme_on_pssm_searched(ref, target, config, seeded):
    """Scan the target genome with the reference motif. Pass regions with
    putative site to the motif discovery as input sequences"""
    meme_settings = get_meme_settings(config, ref, seeded)
    true_motif = get_true_motif(ref, config)
    nsites = int(2.15 * motif.size(true_motif))
    psites = motif.pssm_search_on_regions(true_motif, genome.promoters(target),
                                          nsites)
    regions = sequence.merge_overlapping_seqs(
        [sequence.expand(site) for site in psites])
    for reg in regions:
        print reg.start, reg.end, reg.strand
    motifs = [
        motif.new_motif(sites)
        for sites in meme.motif_discovery(regions, meme_settings)
    ]
    return motifs
def network_transfer(ref, target, config, seeded):
    """Given reference regulon, identify the regulon that is orthologous to the
    reference and therefore the motif. Extension to the network transfer is to
    perform motif discovery seeded."""
    meme_settings = get_meme_settings(config, ref, seeded)
    true_motif = get_true_motif(ref, config)
    true_regulon = motif.regulon(true_motif)
    # Get orthologs
    orthologs = genome.orthologs(ref, target, config.ortholog_dir)
    # Get target regulon (inferred)
    inf_regulon = listutils.nub([
        gene.operon(orthologs[g], target) for opr in true_regulon for g in opr
        if orthologs.get(g, None)
    ])
    promoters = sequence.merge_overlapping_seqs(
        [gene.upstream_region(target, opr[0]) for opr in inf_regulon])
    # If less than 3 promoters, search for motif in all promoters
    if len(promoters) < 3:
        promoters = genome.promoters(target)
    motifs = [
        motif.new_motif(sites)
        for sites in meme.motif_discovery(promoters, meme_settings)
    ]
    return motifs
def direct_transfer(ref, target, config):
    true_motif = get_true_motif(ref, config)
    nsites = int(1.15 * motif.size(true_motif))
    sites = motif.pssm_search_on_regions(true_motif, genome.promoters(target),
                                         nsites)
    return motif.new_motif(sites)