def get_motif_from_tsv(genom, config): """Create a motif by reading csv file""" df = pd.read_csv(config.motif_file, sep='\t') df = df[(df.TF == config.TF) & (df.genome_accession == genome.accession_full(genom))] sites = run_lasagna(df, genom) return motif.new_motif(sites)
def random_motif(ref, target, config): """Given reference motif M having k sites, generate k random subsequences from target genome and build a motif from randomly generated sites""" true_motif = get_true_motif(ref, config) mlen = motif.length(true_motif) msize = motif.size(true_motif) sites = [genome.random_seq(target, mlen) for _ in range(msize)] return motif.new_motif(sites)
def meme_on_pssm_searched(ref, target, config, seeded): """Scan the target genome with the reference motif. Pass regions with putative site to the motif discovery as input sequences""" meme_settings = get_meme_settings(config, ref, seeded) true_motif = get_true_motif(ref, config) nsites = int(2.15 * motif.size(true_motif)) psites = motif.pssm_search_on_regions(true_motif, genome.promoters(target), nsites) regions = sequence.merge_overlapping_seqs( [sequence.expand(site) for site in psites]) for reg in regions: print reg.start, reg.end, reg.strand motifs = [ motif.new_motif(sites) for sites in meme.motif_discovery(regions, meme_settings) ] return motifs
def network_transfer(ref, target, config, seeded): """Given reference regulon, identify the regulon that is orthologous to the reference and therefore the motif. Extension to the network transfer is to perform motif discovery seeded.""" meme_settings = get_meme_settings(config, ref, seeded) true_motif = get_true_motif(ref, config) true_regulon = motif.regulon(true_motif) # Get orthologs orthologs = genome.orthologs(ref, target, config.ortholog_dir) # Get target regulon (inferred) inf_regulon = listutils.nub([ gene.operon(orthologs[g], target) for opr in true_regulon for g in opr if orthologs.get(g, None) ]) promoters = sequence.merge_overlapping_seqs( [gene.upstream_region(target, opr[0]) for opr in inf_regulon]) # If less than 3 promoters, search for motif in all promoters if len(promoters) < 3: promoters = genome.promoters(target) motifs = [ motif.new_motif(sites) for sites in meme.motif_discovery(promoters, meme_settings) ] return motifs
def direct_transfer(ref, target, config): true_motif = get_true_motif(ref, config) nsites = int(1.15 * motif.size(true_motif)) sites = motif.pssm_search_on_regions(true_motif, genome.promoters(target), nsites) return motif.new_motif(sites)