Beispiel #1
0
    def prepare_input_bed(self, inputfile, organism="hg18", width=200, fraction=0.2, abs_max=1000, use_strand=False):
        """ Create all the bed- and fasta-files necessary for motif prediction and validation """    
        self.inputfile = inputfile

        width = int(width)
        fraction = float(fraction)
        abs_max = int(abs_max)
        use_strand = bool(use_strand)

        self.logger.info("Preparing input (BED)")
        
        # Set all peaks to specific width
        self.logger.debug("Creating inputfile %s, width %s" % (self.input_bed, width))
        
    #    if not self.weird:
        write_equalwidth_bedfile(inputfile, width, self.input_bed)
        
        # Split input_bed in prediction and validation set 
        self.logger.debug("Splitting %s into prediction set (%s) and validation set (%s)" % (self.input_bed, self.prediction_bed, self.validation_bed))
        #if not self.weird:
        self.prediction_num, self.validation_num = divide_file(self.input_bed, self.prediction_bed, self.validation_bed, fraction, abs_max)
        
        
        # Make fasta files    
        index_dir = os.path.join(self.config.get_index_dir(), organism)
        self.logger.debug("Creating %s" % (self.prediction_fa))
        
        genome_index.track2fasta(index_dir, self.prediction_bed, self.prediction_fa, use_strand=use_strand, ignore_missing=True)
        self.logger.debug("Creating %s" % (self.validation_fa))
        genome_index.track2fasta(index_dir, self.validation_bed, self.validation_fa, use_strand=use_strand, ignore_missing=True)
Beispiel #2
0
def as_fasta(seqs, index_dir=None):
    ftype = get_seqs_type(seqs)
    if ftype == "fasta":
        return seqs
    elif ftype == "fastafile":
        return Fasta(seqs)
    else:
        if index_dir is None:
            raise ValueError("need index_dir / genome to convert to FASTA")

        tmpfa = NamedTemporaryFile()
        
        if ftype == "bedfile":
            track2fasta(index_dir, seqs, tmpfa.name) 
        else:

            if ftype == "regionfile":
                seqs = [l.strip() for l in open(seqs).readlines()]
            tmpbed = NamedTemporaryFile()
            for seq in seqs:
                vals = re.split(r'[:-]', seq)
                tmpbed.write("{}\t{}\t{}\n".format(*vals))
            tmpbed.flush()
            track2fasta(index_dir, tmpbed.name, tmpfa.name) 
        return Fasta(tmpfa.name)
Beispiel #3
0
	def prepare_input_bed(self, inputfile, organism="hg18", width=200, fraction=0.2, abs_max=1000, use_strand=False):
		""" Create all the bed- and fasta-files necessary for motif prediction and validation """	
		self.inputfile = inputfile

		width = int(width)
		fraction = float(fraction)
		abs_max = int(abs_max)
		use_strand = bool(use_strand)

		self.logger.info("Preparing input (BED)")
		
		# Set all peaks to specific width
		self.logger.debug("Creating inputfile %s, width %s" % (self.input_bed, width))
		
		if not self.weird:
			write_equalwidth_bedfile(inputfile, width, self.input_bed)
		
		# Split input_bed in prediction and validation set 
		self.logger.debug("Splitting %s into prediction set (%s) and validation set (%s)" % (self.input_bed, self.prediction_bed, self.validation_bed))
		if not self.weird:
			self.prediction_num, self.validation_num = divide_file(self.input_bed, self.prediction_bed, self.validation_bed, fraction, abs_max)
		
		
			# Make fasta files	
			index_dir = os.path.join(self.config.get_index_dir(), organism)
			self.logger.debug("Creating %s" % (self.prediction_fa))
			
			genome_index.track2fasta(index_dir, self.prediction_bed, self.prediction_fa, use_strand=use_strand)
			self.logger.debug("Creating %s" % (self.validation_fa))
			genome_index.track2fasta(index_dir, self.validation_bed, self.validation_fa, use_strand=use_strand)
Beispiel #4
0
def as_fasta(seqs, index_dir=None):
    ftype = get_seqs_type(seqs)
    if ftype == "fasta":
        return seqs
    elif ftype == "fastafile":
        return Fasta(seqs)
    else:
        if index_dir is None:
            raise ValueError("need index_dir / genome to convert to FASTA")

        tmpfa = NamedTemporaryFile()
        
        if ftype == "bedfile":
            track2fasta(index_dir, seqs, tmpfa.name) 
        else:

            if ftype == "regionfile":
                seqs = [l.strip() for l in open(seqs).readlines()]
            tmpbed = NamedTemporaryFile()
            for seq in seqs:
                vals = re.split(r'[:-]', seq)
                tmpbed.write("{}\t{}\t{}\n".format(*vals))
            tmpbed.flush()
            track2fasta(index_dir, tmpbed.name, tmpfa.name) 
        return Fasta(tmpfa.name)
Beispiel #5
0
	def create_location_plots(self, motif_file, fasta_file, params):
		self.logger.info("Creating localization plots")
		index_dir = os.path.join(self.config.get_index_dir(), params["genome"])
		lwidth = int(params["lwidth"])
		width = int(params["width"])
		extend = (lwidth - width) / 2
		
		genome_index.track2fasta(index_dir, self.validation_bed, self.location_fa, extend_up=extend, extend_down=extend, use_strand=params["use_strand"])

		jobs = []
		motifs = pwmfile_to_motifs(motif_file)
		
		for motif in motifs:
			outfile = os.path.join(self.imgdir, "%s_histogram.svg" % motif.id)
			motif_localization(fasta_file, motif, lwidth, outfile)
Beispiel #6
0
	def __init__(self, bedfile, genefile, index="/usr/share/gimmemotifs/genome_index/hg18", length=None, multiply=10, match_chromosome=True):
		self.match_chromosome = match_chromosome

		# Create temporary files
		tmpbed = NamedTemporaryFile().name
		tmpfasta = NamedTemporaryFile().name
		
		# Create bed-file with coordinates of random sequences
		self._create_bedfile(tmpbed, bedfile, genefile, length, multiply)
		
		# Convert track to fasta
		track2fasta(index, tmpbed, tmpfasta)

		# Initialize super Fasta object
		Fasta.__init__(self, tmpfasta)

		# Delete the temporary files
		os.remove(tmpbed)
		os.remove(tmpfasta)
Beispiel #7
0
    def __init__(self, index="/usr/share/gimmemotifs/genome_index/hg18", length=None, n=None):
        length = int(length)

        # Create temporary files
        tmpbed = NamedTemporaryFile(dir=mytmpdir()).name
        tmpfasta = NamedTemporaryFile(dir=mytmpdir()).name
        
        # Create bed-file with coordinates of random sequences
        create_random_genomic_bedfile(tmpbed, index, length, n)
        
        # Convert track to fasta
        track2fasta(index, tmpbed, tmpfasta, use_strand=True)

        # Initialize super Fasta object
        Fasta.__init__(self, tmpfasta)

        # Delete the temporary files
        os.remove(tmpbed)
        os.remove(tmpfasta)
Beispiel #8
0
	def __init__(self, genefile, index="/usr/share/gimmemotifs/genome_index/hg18", length=None, n=None):
		length = int(length)

		# Create temporary files
		tmpbed = NamedTemporaryFile().name
		tmpfasta = NamedTemporaryFile().name
		
		# Create bed-file with coordinates of random sequences
		self._create_promoter_bedfile(tmpbed, genefile, length, n)
		
		# Convert track to fasta
		track2fasta(index, tmpbed, tmpfasta, use_strand=True)

		# Initialize super Fasta object
		Fasta.__init__(self, tmpfasta)

		# Delete the temporary files
		os.remove(tmpbed)
		os.remove(tmpfasta)
Beispiel #9
0
    def __init__(self, matchfile, genome="hg19", number=None):
        config = MotifConfig()
        index = os.path.join(config.get_index_dir(), genome)

        # Create temporary files
        tmpbed = NamedTemporaryFile(dir=mytmpdir()).name
        tmpfasta = NamedTemporaryFile(dir=mytmpdir()).name
        
        # Create bed-file with coordinates of random sequences
        matched_gc_bedfile(tmpbed, matchfile, genome, number)
        
        # Convert track to fasta
        track2fasta(index, tmpbed, tmpfasta)

        # Initialize super Fasta object
        Fasta.__init__(self, tmpfasta)

        # Delete the temporary files
        os.remove(tmpbed)
        os.remove(tmpfasta)
Beispiel #10
0
	def __init__(self, bedfile, genefile, index="/usr/share/gimmemotifs/genome_index/hg18", length=None, multiply=10, match_chromosome=True):
		self.match_chromosome = match_chromosome
		length = int(length)

		# Create temporary files
		tmpbed = NamedTemporaryFile().name
		tmpfasta = NamedTemporaryFile().name
		
		# Create bed-file with coordinates of random sequences
		self._create_bedfile(tmpbed, bedfile, genefile, length, multiply)
		
		# Convert track to fasta
		track2fasta(index, tmpbed, tmpfasta)

		# Initialize super Fasta object
		Fasta.__init__(self, tmpfasta)

		# Delete the temporary files
		os.remove(tmpbed)
		os.remove(tmpfasta)
Beispiel #11
0
	def create_location_plots(self, motif_file, params):
		self.logger.info("Creating localization plots")
		if self.input_type == "BED":
		
			index_dir = os.path.join(self.config.get_index_dir(), params["genome"])
			lwidth = int(params["lwidth"])
			width = int(params["width"])
			extend = (lwidth - width) / 2
		
			genome_index.track2fasta(index_dir, self.validation_bed, self.location_fa, extend_up=extend, extend_down=extend, use_strand=params["use_strand"])
		else:
			self.location_fa = self.validation_fa
			fa = Fasta(self.location_fa)
			seqs = fa.seqs
			lwidth = len(seqs[0]) 
			all_same_width = not(False in [len(seq) == lwidth for seq in seqs])
			if not all_same_width:
				self.logger.warn("PLEASE NOTE: FASTA file contains sequences of different lengths. Positional preference plots will be incorrect!")
		
		motifs = pwmfile_to_motifs(motif_file)
		for motif in motifs:
			outfile = os.path.join(self.imgdir, "%s_histogram.svg" % motif.id)
			motif_localization(self.location_fa, motif, lwidth, outfile)
Beispiel #12
0
    def run_full_analysis(self, inputfile, user_params=None):
        """ Full analysis: from bed-file to motifs (including clustering, ROC-curves, location plots and html report) """
        self.logger.info("starting full motif analysis")
        self.logger.debug("Using temporary directory {0}".format(mytmpdir()))

        if user_params is None:
            user_params = {}
        params = self.config.get_default_params()
        params.update(user_params)

        if params["torque"]:
            from gimmemotifs.prediction_torque import pp_predict_motifs, PredictionResult
            self.logger.debug("Using torque")
        else:
            from gimmemotifs.prediction import pp_predict_motifs, PredictionResult
            self.logger.debug("Using multiprocessing")

        self.params = params
        #self.weird = params["weird_option"]

        background = [x.strip() for x in params["background"].split(",")]

        self.logger.debug("Parameters:")
        for param, value in params.items():
            self.logger.debug("  %s: %s", param, value)

        # Checking input
        self.input_type = "BED"
        # If we can load it as fasta then it is a fasta, yeh?
        try:
            Fasta(inputfile)
            self.logger.debug("Inputfile is a FASTA file")
            self.input_type = "FASTA"
        except Exception:
            # Leave it to BED
            pass

        index_msg = ("No index found for genome {}! "
                     "Has GimmeMotifs been configured correctly and is the "
                     "genome indexed?").format(params["genome"])
        index_dir = os.path.join(self.config.get_index_dir(), params["genome"])

        if self.input_type == "FASTA":
            for bg in background:
                if not bg in FA_VALID_BGS:
                    self.logger.info(
                        "Input type is FASTA, can't use background type '%s'",
                        bg)
                if bg == "genomic":
                    if not os.path.exists(index_dir):
                        self.logger.error(index_msg)
                        sys.exit(1)
            background = [bg for bg in background if bg in FA_VALID_BGS]

        elif self.input_type == "BED":
            # Does the index_dir exist?  #bed-specific
            if not os.path.exists(index_dir):
                self.logger.error(index_msg)
                sys.exit(1)

            # is it a valid bed-file etc.
            self._check_input(inputfile)  # bed-specific

            # Check for valid background
            for bg in background:
                if not bg in BED_VALID_BGS:
                    self.logger.info(
                        "Input type is BED, can't use background type '%s'",
                        bg)
            background = [bg for bg in background if bg in BED_VALID_BGS]

        if len(background) == 0:
            self.logger.error("No valid backgrounds specified!")
            sys.exit(1)

        self.max_time = None
        max_time = None
        # Maximum time?
        if params["max_time"]:
            try:
                max_time = float(params["max_time"])
            except Exception:
                self.logger.debug(
                    "Could not parse max_time value, setting to no limit")
                self.max_time = None

            if max_time > 0:
                self.logger.debug(
                    "Time limit for motif prediction: %0.2f hours" % max_time)
                max_time = 3600 * max_time
                self.max_time = max_time
                self.logger.debug("Max_time in seconds %0.0f" % self.max_time)
            else:
                self.logger.debug(
                    "Invalid time limit for motif prediction, setting to no limit"
                )
                self.max_time = None
        else:
            self.logger.debug("No time limit for motif prediction")

        if "random" in background:
            self.markov_model = params["markov_model"]

        # Create the necessary files for motif prediction and validation
        if self.input_type == "BED":
            self.prepare_input_bed(inputfile, params["genome"],
                                   params["width"], params["fraction"],
                                   params["abs_max"], params["use_strand"])

            # Create file for location plots
            index_dir = os.path.join(self.config.get_index_dir(),
                                     params["genome"])
            lwidth = int(params["lwidth"])
            width = int(params["width"])
            extend = (lwidth - width) / 2
            genome_index.track2fasta(index_dir,
                                     self.validation_bed,
                                     self.location_fa,
                                     extend_up=extend,
                                     extend_down=extend,
                                     use_strand=params["use_strand"],
                                     ignore_missing=True)

        elif self.input_type == "FASTA":
            self.prepare_input_fa(inputfile, params["width"],
                                  params["fraction"], params["abs_max"])

            # File for location plots
            self.location_fa = self.validation_fa
            fa = Fasta(self.location_fa)
            seqs = fa.seqs
            lwidth = len(seqs[0])
            all_same_width = not (False
                                  in [len(seq) == lwidth for seq in seqs])
            if not all_same_width:
                self.logger.warn(
                    "PLEASE NOTE: FASTA file contains sequences of different lengths. Positional preference plots will be incorrect!"
                )

        else:
            self.logger.error("Unknown input type, shouldn't happen")
            sys.exit(1)

        tools = dict([(x.strip(), x
                       in [y.strip() for y in params["tools"].split(",")])
                      for x in params["available_tools"].split(",")])

        self.create_background(background, params["genome"], params["width"])

        # Predict the motifs
        analysis = params["analysis"]
        """ Predict motifs, input is a FASTA-file"""
        self.logger.info("starting motif prediction (%s)", analysis)
        self.logger.info("tools: %s",
                         ", ".join([x for x in tools.keys() if tools[x]]))

        bg_file = self.bg_file["fa"][sorted(
            background, lambda x, y: cmp(BG_RANK[x], BG_RANK[y]))[0]]
        self.logger.debug("Using bg_file %s for significance" % bg_file)
        result = pp_predict_motifs(self.prediction_fa,
                                   self.predicted_pfm,
                                   analysis,
                                   params["genome"],
                                   params["use_strand"],
                                   self.prediction_bg,
                                   tools,
                                   self.job_server(),
                                   logger=self.logger,
                                   max_time=self.max_time,
                                   fg_file=self.validation_fa,
                                   bg_file=bg_file)

        motifs = result.motifs
        self.logger.info("predicted %s motifs", len(motifs))
        self.logger.debug("written to %s", self.predicted_pfm)

        if len(motifs) == 0:
            self.logger.info("no motifs found")
            sys.exit()

        # Write stats output to file
        f = open(self.stats_file, "w")
        stat_keys = result.stats.values()[0].keys()
        f.write("%s\t%s\n" % ("Motif", "\t".join(stat_keys)))

        self.logger.debug(result.stats)

        for motif in motifs:
            stats = result.stats["%s_%s" % (motif.id, motif.to_consensus())]
            if stats:
                f.write(
                    "%s\t%s\n" %
                    (motif.id, "\t".join([str(stats[k]) for k in stat_keys])))
            else:
                self.logger.error(
                    "No stats for motif {0}, skipping this motif!".format(
                        motif.id))
                motifs.remove(motif)
        f.close()

        self.motifs_with_stats = motifs

        f = open(self.ranks_file, "w")
        tools = dict((m.id.split("_")[0], 1) for m in motifs).keys()
        f.write("Metric\tType\t%s\n" % ("\t".join(tools)))
        for stat in ["mncp", "roc_auc", "maxenr"]:
            best_motif = {}
            for motif in self.motifs_with_stats:
                val = result.stats["%s_%s" %
                                   (motif.id, motif.to_consensus())][stat]
                name = motif.id.split("_")[0]
                if val > best_motif.setdefault(name, 0):
                    best_motif[name] = val
            names = best_motif.keys()
            vals = [best_motif[name] for name in names]
            rank = rankdata(vals)
            ind = [names.index(x) for x in tools]

            f.write("%s\t%s\t%s\n" %
                    (stat, "value", "\t".join([str(vals[i]) for i in ind])))
            f.write("%s\t%s\t%s\n" %
                    (stat, "rank", "\t".join([str(rank[i]) for i in ind])))
        f.close()

        #self.logger.debug("RANK: %s" % stat)
        #self.logger.debug("\t".join([str(x) for x in names]))
        #self.logger.debug("\t".join([str(x) for x in vals]))
        #self.logger.debug("\t".join([str(x) for x in rank]))

        # Determine significant motifs
        nsig = 0
        f = open(self.significant_pfm, "w")
        for motif in motifs:
            stats = result.stats["%s_%s" % (motif.id, motif.to_consensus())]
            if stats["maxenr"] >= 3 and stats["roc_auc"] >= 0.55 and stats[
                    'enr_fdr'] >= 2:
                f.write("%s\n" % motif.to_pfm())
                nsig += 1
        f.close()
        self.logger.info("%s motifs are significant", nsig)
        self.logger.debug("written to %s", self.significant_pfm)

        if nsig == 0:
            self.logger.info("no significant motifs found")
            return

        # ROC metrics of significant motifs
        for bg in background:
            self._roc_metrics(self.significant_pfm, self.validation_fa,
                              self.bg_file["fa"][bg], self.bg_file["roc"][bg])

        # Cluster significant motifs
        clusters = self._cluster_motifs(self.significant_pfm, self.cluster_pwm,
                                        self.outdir,
                                        params["cluster_threshold"])

        # Determine best motif in cluster

        num_cluster, best_id = self._determine_best_motif_in_cluster(
            clusters, self.final_pwm, self.validation_fa, bg_file, self.imgdir)

        ### Enable parallel and modular evaluation of results
        # Scan (multiple) files with motifs
        # Define callback functions once scanning is finished:
        #    - ROC plot
        #     - Statistics
        #    - Location plots (histogram)
        #     -

        # Stars
        tmp = NamedTemporaryFile(dir=mytmpdir()).name
        p = PredictionResult(tmp,
                             logger=self.logger,
                             job_server=self.server,
                             fg_file=self.validation_fa,
                             bg_file=bg_file,
                             do_counter=False)
        p.add_motifs(
            ("clustering", (read_motifs(open(self.final_pwm)), "", "")))
        while len(p.stats.keys()) < len(p.motifs):
            sleep(5)

        #print "p.stats"
        #print p.stats
        #print "num_cluster"
        #print num_cluster
        for mid, num in num_cluster.items():
            p.stats[mid]["numcluster"] = num

        all_stats = {
            "mncp": [2, 5, 8],
            "roc_auc": [0.6, 0.75, 0.9],
            "maxenr": [10, 20, 30],
            "enr_fdr": [4, 8, 12],
            "fraction": [0.4, 0.6, 0.8],
            "ks_sig": [4, 7, 10],
            "numcluster": [3, 6, 9],
        }

        self.logger.info("creating report")

        # ROC plots
        for bg in background:
            self.create_roc_plots(self.final_pwm, self.validation_fa,
                                  self.bg_file["fa"][bg], bg)

        # Location plots
        self.logger.debug("Creating localization plots")
        motifs = read_motifs(open(self.final_pwm), fmt="pwm")
        for motif in motifs:
            m = "%s_%s" % (motif.id, motif.to_consensus())
            s = p.stats[m]
            outfile = os.path.join(self.imgdir, "%s_histogram.svg" % motif.id)
            motif_localization(self.location_fa,
                               motif,
                               lwidth,
                               outfile,
                               cutoff=s["cutoff_fdr"])

            s["stars"] = int(
                mean([star(s[x], all_stats[x])
                      for x in all_stats.keys()]) + 0.5)
            self.logger.debug("Motif %s: %s stars" % (m, s["stars"]))

        # Calculate enrichment of final, clustered motifs
        self.calculate_cluster_enrichment(self.final_pwm, background)

        # Create report
        self.print_params()
        self._calc_report_values(self.final_pwm, background)
        self._create_report(self.final_pwm,
                            background,
                            stats=p.stats,
                            best_id=best_id)
        self._create_text_report(self.final_pwm, background)

        self.logger.info("finished")
        self.logger.info("output dir: %s", os.path.split(self.motif_report)[0])
        self.logger.info("report: %s", os.path.split(self.motif_report)[-1])
        #self.logger.info("Open %s in your browser to see your results." % (self.motif_report))

        if not (params["keep_intermediate"]):

            self.logger.debug(
                "Deleting intermediate files. Please specifify the -k option if you want to keep these files."
            )
            shutil.rmtree(self.tmpdir)

        self.logger.debug("Done")

        return self.motif_report
Beispiel #13
0
    def run_full_analysis(self, inputfile, user_params=None):
        """ Full analysis: from bed-file to motifs (including clustering, ROC-curves, location plots and html report) """
        self.logger.info("Starting full motif analysis")
        self.logger.info("Using temporary directory {0}".format(mytmpdir()))
    
        if user_params is None:
            user_params = {}
        params = self.config.get_default_params()
        params.update(user_params)
        
        if params["torque"]:
            from gimmemotifs.prediction_torque import pp_predict_motifs, PredictionResult
            self.logger.info("Using torque")
        else:
            from gimmemotifs.prediction import pp_predict_motifs, PredictionResult
            self.logger.info("Using multiprocessing")

        self.params = params
        #self.weird = params["weird_option"]

        background = [x.strip() for x in params["background"].split(",")]
        
        self.logger.info("Parameters:")
        for param, value in params.items():
            self.logger.info("  %s: %s" % (param, value))

        # Checking input
        self.input_type = "BED"
        # If we can load it as fasta then it is a fasta, yeh?
        try:
            Fasta(inputfile)
            self.logger.info("Inputfile is a FASTA file")
            self.input_type = "FASTA"
        except:
            # Leave it to BED
            pass

        if self.input_type == "FASTA":
            for bg in background:
                if not bg in FA_VALID_BGS:
                    self.logger.info("Input type is FASTA, can't use background type '%s'" % bg)
            background = [bg for bg in background if bg in FA_VALID_BGS]
            
        elif self.input_type == "BED":
            # Does the index_dir exist?  #bed-specific
            index_dir = os.path.join(self.config.get_index_dir(), params["genome"])
            if not os.path.exists(index_dir):
                self.logger.error("No index found for genome %s! Has GimmeMotifs been configured correctly and is the genome indexed?" % params["genome"])
                sys.exit(1)

            # is it a valid bed-file etc.
            self._check_input(inputfile)    # bed-specific

            # Check for valid background
            for bg in background:
                if not bg in BED_VALID_BGS:
                    self.logger.info("Input type is BED, can't use background type '%s'" % bg)
            background = [bg for bg in background if bg in BED_VALID_BGS]
    
        if len(background) == 0:
            self.logger.error("No valid backgrounds specified!")
            sys.exit(1)

        self.max_time = None
        max_time = None
        # Maximum time?
        if params["max_time"]:
            try:
                max_time = float(params["max_time"])
            except:
                self.logger.info("Could not parse max_time value, setting to no limit")
                self.max_time = None

            if max_time > 0:
                self.logger.info("Time limit for motif prediction: %0.2f hours" % max_time)
                max_time = 3600 * max_time
                self.max_time = max_time
                self.logger.debug("Max_time in seconds %0.0f" % self.max_time)
            else:
                self.logger.info("Invalid time limit for motif prediction, setting to no limit")
                self.max_time = None
        else:
                self.logger.info("No time limit for motif prediction")
            
        if "random" in background:
            self.markov_model = params["markov_model"]

        # Create the necessary files for motif prediction and validation
        if self.input_type == "BED":
            self.prepare_input_bed(inputfile, params["genome"], params["width"], params["fraction"], params["abs_max"], params["use_strand"])
        
        
             # Create file for location plots
            index_dir = os.path.join(self.config.get_index_dir(), params["genome"])
            lwidth = int(params["lwidth"])
            width = int(params["width"])
            extend = (lwidth - width) / 2
            genome_index.track2fasta(index_dir, self.validation_bed, self.location_fa, extend_up=extend, extend_down=extend, use_strand=params["use_strand"], ignore_missing=True)
        
        elif self.input_type == "FASTA":
            self.prepare_input_fa(inputfile, params["width"], params["fraction"], params["abs_max"])
        
            # File for location plots
            self.location_fa = self.validation_fa
            fa = Fasta(self.location_fa)
            seqs = fa.seqs
            lwidth = len(seqs[0]) 
            all_same_width = not(False in [len(seq) == lwidth for seq in seqs])
            if not all_same_width:
                self.logger.warn("PLEASE NOTE: FASTA file contains sequences of different lengths. Positional preference plots will be incorrect!")
        
        else:
            self.logger.error("Unknown input type, shouldn't happen")
            sys.exit(1)

        tools = dict([(x.strip(), x in [y.strip() for y in  params["tools"].split(",")]) for x in params["available_tools"].split(",")])
    
        self.create_background(background, params["genome"], params["width"])

        # Predict the motifs
        analysis = params["analysis"]
        """ Predict motifs, input is a FASTA-file"""
        self.logger.info("Starting motif prediction (%s) using %s" % 
            (analysis, ", ".join([x for x in tools.keys() if tools[x]])))

        bg_file = self.bg_file["fa"][sorted(background, lambda x,y: cmp(BG_RANK[x], BG_RANK[y]))[0]]
        self.logger.info("Using bg_file %s for significance" % bg_file)
        result = pp_predict_motifs(self.prediction_fa, self.predicted_pfm, analysis, params["genome"], params["use_strand"], self.prediction_bg, tools, self.job_server(), logger=self.logger, max_time=self.max_time, fg_file=self.validation_fa, bg_file=bg_file)
    
        motifs = result.motifs
        self.logger.info("Predicted %s motifs, written to %s" % (len(motifs), self.predicted_pfm))
        
        if len(motifs) == 0:
            self.logger.info("No motifs found. Done.")
            sys.exit()
        
        # Write stats output to file
        f = open(self.stats_file, "w")
        stat_keys = result.stats.values()[0].keys()
        f.write("%s\t%s\n" % ("Motif", "\t".join(stat_keys)))
        print result.stats
        for motif in motifs:
            stats = result.stats["%s_%s" % (motif.id, motif.to_consensus())]
            if stats:
                f.write("%s\t%s\n" % (motif.id, "\t".join([str(stats[k]) for k in stat_keys])))
            else:
                self.logger.error("No stats for motif {0}, skipping this motif!".format(motif.id))
                motifs.remove(motif)
        f.close()
    
        self.motifs_with_stats = motifs

        f = open(self.ranks_file, "w")
        tools = dict((m.id.split("_")[0],1) for m in motifs).keys()
        f.write("Metric\tType\t%s\n" % ("\t".join(tools)))
        for stat in ["mncp", "roc_auc", "maxenr"]:
            best_motif = {}
            for motif in self.motifs_with_stats:
                val = result.stats["%s_%s" % (motif.id, motif.to_consensus())][stat]
                name = motif.id.split("_")[0]
                if val > best_motif.setdefault(name, 0):
                    best_motif[name] = val
            names = best_motif.keys()
            vals = [best_motif[name] for name in names]
            rank = rankdata(vals)
            ind = [names.index(x) for x in tools]
            
            f.write("%s\t%s\t%s\n" % (stat, "value", "\t".join([str(vals[i]) for i in ind])))
            f.write("%s\t%s\t%s\n" % (stat, "rank", "\t".join([str(rank[i]) for i in ind])))
        f.close()
            
            
            #self.logger.debug("RANK: %s" % stat)
            #self.logger.debug("\t".join([str(x) for x in names]))
            #self.logger.debug("\t".join([str(x) for x in vals]))
            #self.logger.debug("\t".join([str(x) for x in rank]))

        # Determine significant motifs
        nsig = 0 
        f = open(self.significant_pfm, "w")
        for motif in motifs:
            stats = result.stats["%s_%s" % (motif.id, motif.to_consensus())]
            if stats["maxenr"] >= 3 and stats["roc_auc"] >= 0.55 and stats['enr_fdr'] >= 2:
                f.write("%s\n" % motif.to_pfm())
                nsig += 1
        f.close()        
        self.logger.info("%s motifs are significant, written to %s" % (nsig, self.significant_pfm))
        
        if nsig == 0:
            self.logger.info("No significant motifs found. Done.")
            sys.exit()
        
        # ROC metrics of significant motifs
        for bg in background:
            self._roc_metrics(self.significant_pfm, self.validation_fa, self.bg_file["fa"][bg], self.bg_file["roc"][bg])
        
        # Cluster significant motifs
        clusters = self._cluster_motifs(self.significant_pfm, self.cluster_pwm, self.outdir, params["cluster_threshold"])
        
        # Determine best motif in cluster
        num_cluster, best_id = self._determine_best_motif_in_cluster(clusters, self.final_pwm, self.validation_fa, bg_file, self.imgdir)
        
        ### Enable parallel and modular evaluation of results
        # Scan (multiple) files with motifs
        # Define callback functions once scanning is finished:
        #    - ROC plot
        #     - Statistics
        #    - Location plots (histogram)
        #     -
        
        # Stars
        tmp = NamedTemporaryFile(dir=mytmpdir()).name
        p = PredictionResult(tmp, logger=self.logger, job_server=self.server, fg_file = self.validation_fa, bg_file = bg_file) 
        p.add_motifs(("Clustering",  (pwmfile_to_motifs(self.final_pwm), "","")))
        while len(p.stats.keys()) < len(p.motifs):
            sleep(5)

        for mid, num in num_cluster.items():
            p.stats[mid]["numcluster"] = num

        all_stats = {
            "mncp": [2, 5, 8],                
            "roc_auc": [0.6, 0.75, 0.9],    
            "maxenr": [10, 20, 30],         
            "enr_fdr": [4, 8, 12],         
            "fraction": [0.4, 0.6, 0.8],    
            "ks_sig": [4, 7, 10],
            "numcluster": [3, 6, 9],
        }

        
        # ROC plots
        for bg in background:
            self.create_roc_plots(self.final_pwm, self.validation_fa, self.bg_file["fa"][bg], bg)
        
        # Location plots
        self.logger.info("Creating localization plots")
        motifs = pwmfile_to_motifs(self.final_pwm)
        for motif in motifs:
            m = "%s_%s" % (motif.id, motif.to_consensus())
            s = p.stats[m]
            outfile = os.path.join(self.imgdir, "%s_histogram.svg" % motif.id)
            motif_localization(self.location_fa, motif, lwidth, outfile, cutoff=s["cutoff_fdr"])
    
            s["stars"] = int(mean([star(s[x], all_stats[x]) for x in all_stats.keys()]) + 0.5)
            self.logger.debug("Motif %s: %s stars" % (m, s["stars"]))

        # Calculate enrichment of final, clustered motifs
        self.calculate_cluster_enrichment(self.final_pwm, background)

        # Create report    
        self.print_params()
        self._calc_report_values(self.final_pwm, background)
        self._create_report(self.final_pwm, background, stats=p.stats, best_id=best_id)
        self._create_text_report(self.final_pwm, background)
        self.logger.info("Open %s in your browser to see your results." % (self.motif_report))
        
        if not(params["keep_intermediate"]):
            
            self.logger.info("Deleting intermediate files. Please specifify the -k option if you want to keep these files.")
            shutil.rmtree(self.tmpdir)

        self.logger.info("Done")