def make_alignment(self, cfg, alignment): # Make an Alignment from the source, using this subset sub_alignment = SubsetAlignment(alignment, self) sub_path = os.path.join(cfg.phyml_path, self.name + '.phy') # Add it into the sub, so we keep it around self.alignment_path = sub_path # Maybe it is there already? if os.path.exists(sub_path): log.debug("Found existing alignment file %s", sub_path) old_align = Alignment() old_align.read(sub_path) # It had better be the same! if not old_align.same_as(sub_alignment): log.error( "It looks like you have changed one or more of the " "data_blocks in the configuration file, " "so the new subset alignments " "don't match the ones stored for this analysis. " "You'll need to run the program with --force-restart") raise SubsetError else: # We need to write it sub_alignment.write(sub_path)
def make_alignment(self, source_alignment_path): # Make the alignment self.alignment = Alignment() self.alignment.read(source_alignment_path) # TODO REMOVE -- this should be part of the checking procedure # We start by copying the alignment self.alignment_path = os.path.join(the_config.start_tree_path, 'source.phy') if os.path.exists(self.alignment_path): # Make sure it is the same old_align = Alignment() old_align.read(self.alignment_path) if not old_align.same_as(self.alignment): log.error("""Alignment file has changed since previous run. You need to use the force-restart option.""") raise AnalysisError compare = lambda x, y: collections.Counter(x) == collections.Counter(y) if not compare(old_align.species, self.alignment.species): log.error("""Species names in alignment have changed since previous run. You need to use the force-restart option.""") raise AnalysisError else: self.alignment.write(self.alignment_path)
def permuted_copy(self, partition=None): """ Return a copy of the collection with all alignment columns permuted """ def take(n, iterable): return [iterable.next() for _ in range(n)] if partition is None: partition = Partition([1] * len(self)) index_tuples = partition.get_membership() alignments = [] for ix in index_tuples: concat = Concatenation(self, ix) sites = concat.alignment.get_sites() random.shuffle(sites) d = dict(zip(concat.alignment.get_names(), [iter(x) for x in zip(*sites)])) new_seqs = [[(k, ''.join(take(l, d[k]))) for k in d] for l in concat.lengths] for seqs, datatype, name in zip(new_seqs, concat.datatypes, concat.names): alignment = Alignment(seqs, datatype) alignment.name = name alignments.append(alignment) return self.__class__(records=sorted(alignments, key=lambda x: SORT_KEY(x.name)))
def permuted_copy(self, partition=None): """ Return a copy of the collection with all alignment columns permuted """ def take(n, iterable): return [iterable.next() for _ in range(n)] if partition is None: partition = Partition([1] * len(self)) index_tuples = partition.get_membership() alignments = [] for ix in index_tuples: concat = Concatenation(self, ix) sites = concat.alignment.get_sites() random.shuffle(sites) d = dict( zip(concat.alignment.get_names(), [iter(x) for x in zip(*sites)])) new_seqs = [[(k, ''.join(take(l, d[k]))) for k in d] for l in concat.lengths] for seqs, datatype, name in zip(new_seqs, concat.datatypes, concat.names): alignment = Alignment(seqs, datatype) alignment.name = name alignments.append(alignment) return self.__class__( records=sorted(alignments, key=lambda x: SORT_KEY(x.name)))
def parse_input(target_sequence: str, input_fasta_file: str) -> None: """ parse_input :param target_sequence: The target sequence as a string. :param input_fasta_file: The path to a FASTA file. """ if not Helpers.valid_dna_sequence(target_sequence): raise click.UsageError( 'The target sequence is not a valid DNA sequence.') input_sequence = Helpers.fasta_file_to_sequence(input_fasta_file) if not input_sequence: raise click.UsageError('Passed input file not in FASTA format.') optimal_alignment_finder = OptimalAlignmentFinder(target_sequence, input_sequence) # If the results directory doesn't exist, create it. if not os.path.exists(RESULTS_DIRECTORY): os.makedirs(RESULTS_DIRECTORY) Alignment.save_alignments(f'{RESULTS_DIRECTORY}forward_alignments.json', optimal_alignment_finder.forward_alignments) Histogram.save_histograms(optimal_alignment_finder.forward_alignments, True) Alignment.save_alignments( f'{RESULTS_DIRECTORY}reverse_complement_alignments.json', optimal_alignment_finder.reverse_complement_alignments) Histogram.save_histograms( optimal_alignment_finder.reverse_complement_alignments, False)
def make_alignment(self, cfg, alignment): # Make an Alignment from the source, using this subset sub_alignment = SubsetAlignment(alignment, self) sub_path = os.path.join(cfg.phylofiles_path, self.name + '.phy') # Add it into the sub, so we keep it around self.alignment_path = sub_path # Maybe it is there already? if os.path.exists(sub_path): log.debug("Found existing alignment file %s", sub_path) old_align = Alignment() old_align.read(sub_path) # It had better be the same! if not old_align.same_as(sub_alignment): log.error( "It looks like you have changed one or more of the " "data_blocks in the configuration file, " "so the new subset alignments " "don't match the ones stored for this analysis. " "You'll need to run the program with --force-restart") raise SubsetError else: # We need to write it sub_alignment.write(sub_path)
def execute(self): # Alignment # TODO: choose mode automatically msa = Alignment(messages=self.messages, output_dir=self.output_dir, mode=self.mode, multithread=self.multithread) #msa = Alignment(messages=self.messages, output_dir=self.output_dir, multithread=True) msa.execute() # exit() # Generate fields filepath_fields_info = os.path.join(self.output_dir, Alignment.FILENAME_FIELDS_INFO) self.fields, fid_list = self.generate_fields_by_fieldsinfo( filepath_fields_info) logging.debug("Number of keyword candidates: {}\nfid: {}".format( len(fid_list), fid_list)) # Compute probabilities of observation constraints constraint = Constraint(messages=self.messages, direction_list=self.direction_list, fields=self.fields, fid_list=fid_list, output_dir=self.output_dir) pairs_p, pairs_size = constraint.compute_observation_probabilities() pairs_p_request, pairs_p_response = pairs_p pairs_size_request, pairs_size_response = pairs_size constraint.save_observation_probabilities(pairs_p_request, pairs_size_request, Constraint.TEST_TYPE_REQUEST) constraint.save_observation_probabilities( pairs_p_response, pairs_size_response, Constraint.TEST_TYPE_RESPONSE) # pairs_p_request, pairs_size_request = constraint.load_observation_probabilities(Constraint.TEST_TYPE_REQUEST) # pairs_p_response, pairs_size_response = constraint.load_observation_probabilities(Constraint.TEST_TYPE_RESPONSE) # print(pairs_p_request, pairs_size_request) # print(pairs_p_response, pairs_size_response) # Probabilistic inference pairs_p_all, pairs_size_all = self.merge_constraint_results( pairs_p_request, pairs_p_response, pairs_size_request, pairs_size_response) ffid_list = ["{0}-{0}".format(fid) for fid in fid_list] #only test same fid for both sides pi = ProbabilisticInference(pairs_p=pairs_p_request, pairs_size=pairs_size_request) fid_inferred = pi.execute(ffid_list) ## TODO: iterative ## TODO: format inference return fid_inferred
def get_results(self): if self.file_read_job == None: return self.results else: # self.results=read_internal_alignment(self.alignedfn,) alignment = Alignment() alignment.datatype = self.datatype alignment.read_filepath(self.alignedfn, file_format='FASTA') self.results = alignment return self.results
def __init__(self, gui, parent=None): """ Establish the connection with the main gui, set some instance variables and initialize all flags to False. :param gui: main gui object """ QtCore.QThread.__init__(self, parent) self.gui = gui # Create the alignment object. Alignment points are kept throughout the whole program # execution, even if the telescope driver or other configuration parameters are changed. self.al = Alignment(self.gui.configuration, debug=self.gui.configuration.alignment_debug) self.exiting = False self.output_channel_initialization_flag = False self.telescope_initialization_flag = False self.camera_initialization_flag = False self.new_tesselation_flag = False self.slew_to_alignment_point_flag = False self.perform_alignment_flag = False self.perform_autoalignment_flag = False self.slew_to_moon_limb_flag = False self.set_focus_area_flag = False self.goto_focus_area_flag = False self.slew_to_tile_and_record_flag = False self.move_to_selected_tile_flag = False self.escape_pressed_flag = False # Save the descriptor of standard output. Stdout might be redirected to a file and back # later. self.stdout_saved = sys.stdout # Initialize status variables. self.output_redirected = False self.telescope_connected = False self.camera_connected = False self.tesselation_created = False # Initialize some instance variables. self.active_tile_number = -1 self.all_tiles_recorded = False self.protocol_file = None self.telescope = None self.camera = None self.date_time = None self.me = None self.tc = None self.repeat_from_here = None self.tile_indices_since_last_autoalign = None self.start()
def read_internal_alignment(fn, file_format="FASTA", datatype=None, dirs_to_delete=(), temp_fs=None): alignment = Alignment() alignment.datatype = datatype alignment.read_filepath(fn, file_format=file_format) if len(alignment) >= 1: if dirs_to_delete: assert temp_fs for d in dirs_to_delete: time.sleep(1) # TODO: not sure why this is here! temp_fs.remove_dir(d) return alignment else: raise ValueError("The alignment file has no sequences. SATe quits." % fn)
def make_alignment(self, source_alignment_path): # Make the alignment self.alignment = Alignment() self.alignment.read(source_alignment_path) # We start by copying the alignment self.alignment_path = os.path.join(self.cfg.start_tree_path, 'source.phy') if os.path.exists(self.alignment_path): # Make sure it is the same old_align = Alignment() old_align.read(self.alignment_path) if not old_align.same_as(self.alignment): log.error("Alignment file has changed since previous run. You need to use the force-restart option.") raise AnalysisError else: self.alignment.write(self.alignment_path)
def build_expr(self, context, expr, filter=None, align=None): score_expr = LogitScore(expr) if align is not None: # we do not need add_filter because Alignment already handles it return Alignment(score_expr, align, filter=filter) else: return self.add_filter(ComparisonOp('>', score_expr, 0.5), filter)
def generate_xml_tree(self): """ Try to parse xml, generate tree with xml tags and then cast it to mainAligment object and Alignment :return: exception when file has't got correct content """ try: tree = et.parse(self.file) self.root = tree.getroot() self.blast_output = self.root[8] self.iteration = self.blast_output[0] self.iteration_hit = self.iteration[4] for i in self.iteration_hit: self.hits.append(i) for i in self.hits: h = [] for j in i: h.append(j) for hsp in h[5]: procent = "{0:.2f}".format( int(hsp[10].text) / int(hsp[13].text) * 100) procent = float(procent) self.aligns.append( Alignment(h[2].text, hsp[1].text, procent, hsp[12].text, hsp[10].text, hsp[13].text, hsp[14].text, hsp[15].text, hsp[16].text)) self.main_alignments.append( MainAlignment(i[2].text, self.aligns)) self.aligns = [] except IndexError: "Bad file."
def __init__(self, line_string): self.type = self.TYPE_HEADER if line_string.startswith('@') \ else self.TYPE_ALIGNMENT if self.type == self.TYPE_HEADER: self.fields = [line_string] return self.fields = line_string.split() pos, cigar = self.fields[3], self.fields[5] if cigar == '*': raise CigarUnavailableError md = next(filter(lambda field: field.startswith('MD:Z:'), self.fields)) md = md.replace('MD:Z:', '') self.alignment = Alignment(pos, cigar, md)
def find_single_unique(alns, bam, debug=False): """Extracts single unique alignment for indel detection If there is only one alignment reported by BWA-mem even when '-a' is turned on Args: alns: (list) Pysam AlignedRead objects of the same contig bam: Pysam bam handle Returns: Alignment object or None """ primary_alns = [ aln for aln in alns if not aln.is_unmapped and not aln.is_secondary ] if len(primary_alns) == 1: if primary_alns[0].mapq > 0: matched_and_insertion_len = sum( [a[1] for a in primary_alns[0].cigar if a[0] <= 1]) if float(matched_and_insertion_len) / float( primary_alns[0].rlen) < 0.95: if debug: sys.stdout.write( 'best alignment less than 0.95 mapped:%s %s\n' % (alns[0].qname, alns[0].cigarstring)) return None else: edit_distance = effective_edit_distance(alns[0]) if edit_distance is not None and float(edit_distance) / float( primary_alns[0].inferred_length) > 0.1: if debug: sys.stdout.write( 'filter out single uniq alignment %s: edit distance %s - > 0.1 of contig len %d (%.01f)\n' % (alns[0].qname, edit_distance, primary_alns[0].inferred_length, float(edit_distance) / float(primary_alns[0].inferred_length))) return None else: if debug: sys.stdout.write( 'filter out single uniq alignment %s: mapq = 0\n' % primary_alns[0].qname) return None #ambiguous_NM = 5 #for aln in alns: #if aln.is_secondary and \ #not re.search('[HS]', aln.cigarstring) and\ #re.match('\d+M', aln.cigarstring) and re.search('\d+M$', aln.cigarstring) and\ #int(aln.opt('NM')) - int(primary_alns[0].opt('NM')) <= ambiguous_NM: #if debug: #sys.stdout.write('secondary alignments too similar %s\n' % primary_alns[0].qname) #return None return Alignment.from_alignedRead(primary_alns[0], bam) else: return None
def alignment(self): """Make self into an alignment, and return it. If all the sequences are the same length and type, then self, a sequenceList, could be an Alignment. This method generates an Alignment instance, runs the Alignment method checkLengthsAndTypes(), and returns the Alignment. If you feed p4 a fasta sequence, it makes SequenceList object, and runs this method on it. If it works then p4 puts the Alignment object in var.alignments, and if not it puts the SequenceList object in var.sequenceLists. It is possible that p4 might think that some short sequences are DNA when they are really protein. In that case it will fail to make an alignment, because it will fail the types check. So what you can do is something like this:: sl = var.sequenceLists[0] for s in sl.sequences: s.dataType = 'protein' a = sl.alignment() """ from alignment import Alignment a = Alignment() a.fName = self.fName import copy a.sequences = copy.deepcopy(self.sequences) # self will be deleted a.fName = self.fName a.checkLengthsAndTypes() return a
def make_alignment(self, source_alignment_path): # Make the alignment self.alignment = Alignment() self.alignment.read(source_alignment_path) # TODO REMOVE -- this should be part of the checking procedure # We start by copying the alignment self.alignment_path = os.path.join(the_config.start_tree_path, 'source.phy') if os.path.exists(self.alignment_path): # Make sure it is the same old_align = Alignment() old_align.read(self.alignment_path) if not old_align.same_as(self.alignment): log.error("""Alignment file has changed since previous run. You need to use the force-restart option.""") raise AnalysisError compare = lambda x, y: collections.Counter( x) == collections.Counter(y) if not compare(old_align.species, self.alignment.species): log.error( """Species names in alignment have changed since previous run. You need to use the force-restart option.""") raise AnalysisError else: self.alignment.write(self.alignment_path)
def main(): args = args_init(vars(get_args()), align=True) # save as dictionary # log.info('aaaaa') # args['align_to_te'] = True ## run alignment map_bam_list = Alignment(**args).run()
def read_internal_alignment(fn, file_format='FASTA', datatype=None, dirs_to_delete=(), temp_fs=None): alignment = Alignment() alignment.datatype = datatype alignment.read_filepath(fn, file_format=file_format) if len(alignment) >= 1: if dirs_to_delete: assert (temp_fs) for d in dirs_to_delete: time.sleep(.1) #TODO: not sure why this is here! temp_fs.remove_dir(d) return alignment else: raise ValueError( "The alignment file %s has no sequences. PASTA quits." % fn)
def find_chimera(alns, bam, debug=False, check_haplotype=True): """Determine if given alignments are chimeric Args: alns: (List) List of Pysam AlignedRead objects bam: (AlignmentFile) Pysam handle to BAM file - for getting reference info debug: (Boolean) debug mode - will output debugging statements check_haplotype: (Boolean) whether to screen out alignments to references containing '_' """ primary_alns = [] secondary_alns = [] for aln in alns: if re.search('[HS]', aln.cigarstring) and not aln.is_secondary: primary_alns.append(aln) else: secondary_alns.append(aln) if check_haplotype and len(primary_alns) > 1: replace_haplotype(primary_alns, secondary_alns, bam) if len(primary_alns) > 1: aligns = [Alignment.from_alignedRead(aln, bam) for aln in primary_alns] bad_aligns = [align for align in aligns if not align.is_valid()] if bad_aligns: if debug: for align in bad_aligns: sys.stdout.write('bad alignment %s %s %s %s %s %s' % (align.query, align.qstart, align.qend, align.target, align.tstart, align.tend)) else: valid_secondary_aligns = [] if secondary_alns: secondary_aligns = [ Alignment.from_alignedRead(aln, bam) for aln in secondary_alns ] valid_secondary_aligns = [ align for align in secondary_aligns if align.is_valid() ] return aligns, valid_secondary_aligns return None, None
def find_chimera(alns, bam, debug=False, check_haplotype=True): """Determine if given alignments are chimeric Args: alns: (List) List of Pysam AlignedRead objects bam: (AlignmentFile) Pysam handle to BAM file - for getting reference info debug: (Boolean) debug mode - will output debugging statements check_haplotype: (Boolean) whether to screen out alignments to references containing '_' """ primary_alns = [] secondary_alns = [] for aln in alns: if re.search('[HS]', aln.cigarstring) and not aln.is_secondary: primary_alns.append(aln) else: secondary_alns.append(aln) if check_haplotype and len(primary_alns) > 1: replace_haplotype(primary_alns, secondary_alns, bam) if len(primary_alns) > 1: aligns = [Alignment.from_alignedRead(aln, bam) for aln in primary_alns] bad_aligns = [align for align in aligns if not align.is_valid()] if bad_aligns: if debug: for align in bad_aligns: sys.stdout.write('bad alignment %s %s %s %s %s %s' % (align.query, align.qstart, align.qend, align.target, align.tstart, align.tend)) else: valid_secondary_aligns = [] if secondary_alns: secondary_aligns = [Alignment.from_alignedRead(aln, bam) for aln in secondary_alns] valid_secondary_aligns = [align for align in secondary_aligns if align.is_valid()] return aligns, valid_secondary_aligns return None, None
def solveAlignment(method, fileName): alignment = Alignment(fileName) # Se crea el archivo alignment.readFile() # Se lee el archivo con la información if method == '1': # Se elige fuerza bruta # start = datetime.now() result, result1, result2 = alignment.bruteForceSolving() # Se resuelve alignment.printBruteForce(result, result1, result2) # Se imprime los resultados # print(datetime.now() - start) elif method == '2': start = datetime.now() matrix, moves, result, result1, result2 = alignment.dynamicSolving( ) # Se resuelve alignment.printDynamic(matrix, moves, result, result1, result2) # Se imprime los resultados # print(datetime.now() - start) else: error( "Error, revise que utilice los parametros correctos. \n Utilize [-h] para ayuda." )
def make_alignment(self, cfg, alignment): # Make an Alignment from the source, using this subset sub_alignment = SubsetAlignment(alignment, self) sub_path = os.path.join(cfg.phylofiles_path, self.name + '.phy') # Add it into the sub, so we keep it around self.alignment_path = sub_path # Maybe it is there already? if os.path.exists(sub_path): log.debug("Found existing alignment file %s", sub_path) old_align = Alignment() old_align.read(sub_path) # It had better be the same! if not old_align.same_as(sub_alignment): log.error(self.FORCE_RESTART_MESSAGE) raise SubsetError else: # We need to write it sub_alignment.write(sub_path)
def calcUnconstrainedLogLikelihood1(self): """Calculate likelihood under the multinomial model. This calculates the unconstrained (multinomial) log like without regard to character partitions. The result is placed in the data variable unconstrainedLogLikelihood. If there is more than one partition, it makes a new temporary alignment and puts all the sequences in one part in that alignment. So it ultimately only works on one data partition. If there is more than one alignment, there is possibly more than one datatype, and so this method will refuse to do it. Note that the unconstrained log like of the combined data is not the sum of the unconstrained log likes of the separate partitions. See also calcUnconstrainedLogLikelihood2 """ if len(self.alignments) > 1: gm = ["Data.calcUnconstrainedLogLikelihood()"] gm.append("This method is not implemented for more than one alignment.") raise P4Error(gm) if self.nParts == 1: # no problem self.unconstrainedLogLikelihood = pf.getUnconstrainedLogLike(self.parts[0].cPart) else: a = self.alignments[0] import copy newAlig = Alignment() newAlig.dataType = a.dataType newAlig.symbols = a.symbols newAlig.dim = a.dim newAlig.equates = a.equates newAlig.taxNames = a.taxNames for s in a.sequences: newAlig.sequences.append(copy.deepcopy(s)) newAlig.checkLengthsAndTypes() newAlig._initParts() # newAlig.dump() self.unconstrainedLogLikelihood = pf.getUnconstrainedLogLike(newAlig.parts[0].cPart) del (newAlig)
def simulate(self, partition, outdir, batchsize=1, **kwargs): """ Simulate a set of alignments from the parameters inferred on a partition :param partition: :return: """ indices = partition.get_membership() self.add_lnl_partitions(partition, **kwargs) results = [self.lnl_cache[ix] for ix in indices] places = dict((j, i) for (i, j) in enumerate( rec.name for rec in self.collection.records)) # Collect argument list args = [None] * len(self.collection) for result in results: for partition in result['partitions'].values(): place = places[partition['name']] args[place] = (len(self.collection[place]), model_translate(partition['model']), partition['frequencies'], partition['alpha'], result['ml_tree'], partition['rates'] if 'rates' in partition else None) # Distribute work msg = 'Simulating' client = get_client() if client is None: map_result = sequential_map(client, tasks.simulate_task, args, msg) else: map_result = parallel_map(client, tasks.simulate_task, args, msg, batchsize, background) if background: return map_result # Process results for i, result in enumerate(map_result): orig = self.collection[i] simseqs = gapmask(result, orig.get_sequences()) al = Alignment(simseqs, 'protein' if orig.is_protein() else 'dna') outfile = os.path.join(outdir, orig.name + '.phy') al.write_alignment(outfile, 'phylip', True)
def make_alignment(self, cfg, alignment): # Make an Alignment from the source, using this subset sub_alignment = SubsetAlignment(alignment, self) sub_path = os.path.join(cfg.phylofiles_path, self.subset_id + '.phy') # Add it into the sub, so we keep it around self.alignment_path = sub_path # Maybe it is there already? if os.path.exists(sub_path): log.debug("Found existing alignment file %s" % sub_path) old_align = Alignment() old_align.read(sub_path) # It had better be the same! if not old_align.same_as(sub_alignment): log.error(self.FORCE_RESTART_MESSAGE) raise SubsetError else: # We need to write it sub_alignment.write(sub_path)
def simulate(self, partition, outdir, batchsize=1, **kwargs): """ Simulate a set of alignments from the parameters inferred on a partition :param partition: :return: """ indices = partition.get_membership() self.add_lnl_partitions(partition, **kwargs) results = [self.lnl_cache[ix] for ix in indices] places = dict((j,i) for (i,j) in enumerate(rec.name for rec in self.collection.records)) # Collect argument list args = [None] * len(self.collection) for result in results: for partition in result['partitions'].values(): place = places[partition['name']] args[place] = (len(self.collection[place]), model_translate(partition['model']), partition['frequencies'], partition['alpha'], result['ml_tree'], partition['rates'] if 'rates' in partition else None) # Distribute work msg = 'Simulating' client = get_client() if client is None: map_result = sequential_map(client, tasks.simulate_task, args, msg) else: map_result = parallel_map(client, tasks.simulate_task, args, msg, batchsize, background) if background: return map_result # Process results for i, result in enumerate(map_result): orig = self.collection[i] simseqs = gapmask(result, orig.get_sequences()) al = Alignment(simseqs, 'protein' if orig.is_protein() else 'dna') outfile = os.path.join(outdir, orig.name + '.phy') al.write_alignment(outfile, 'phylip', True)
def map_aligns(self, bam, query_fasta, genome_fasta, accessory_known_features=None, find_events=True, max_diff=1): mappings = defaultdict(list) junc_adjs = [] events = [] for query, group in groupby(bam.fetch(until_eof=True), lambda aln: aln.query_name): print 'processing', query aligns = [] for aln in list(group): if not aln.is_unmapped: aligns.append(Alignment.from_alignedRead(aln, bam)) if not aligns: continue query_seq = query_fasta.fetch(query) for align in aligns: if not align.has_canonical_target() or align.blocks is None: continue block_matches = self.map_align(align) if block_matches: tid = self.pick_best_mapping(block_matches, align) if tid is not None: transcript = self.transcripts_dict[tid] olap = self.overlap(align, transcript) mappings[query].append( (transcript.gene, transcript.id, olap)) junc_adjs.extend( self.collect_junctions(align, transcript, block_matches[tid])) if find_events: events.extend( find_novel_junctions(block_matches[tid], align, transcript, query_seq, self.genome_fasta, accessory_known_features= accessory_known_features, max_diff=max_diff)) return mappings, junc_adjs, events
def te_aligner(fq1_files, smp_name, args, fq2_files=None): """Mapping reads to genome control or treatment args dict, the arguments of pipeline check index 1. rRNA 2. genome 3. spike-in-rRNA 4. spike-in """ project_path = init_rnaseq_project(args['path_out'], analysis_type=1) te_align_path = project_path['transposon'] args['extra_index'] = None # pre-build # ## qc-report # qc_path = os.path.join(te_align_path['report'], 'qc') # QC_reporter(fq1_files, qc_path).run() ## skip, run in gene_aligner ## update args args['fq1'] = fq1_files args['fq2'] = fq2_files args['path_out'] = te_align_path['mapping'] args['smp_name'] = smp_name args['align_to_te'] = True # extra small genome small_genome = args['small_genome'] args['small_genome'] = True ## run alignment map_bam_list = Alignment(**args).run() map_bam = [item for sublist in map_bam_list for item in sublist] # create bigWig files # for bam in map_bam: # bam2bigwig( # bam=bam, # genome=args['genome'], # path_out=te_align_path['bigWig'], # strandness=args['s'], # binsize=args['bin_size'], # overwrite=args['overwrite']) ## return args['small_genome'] = small_genome return map_bam
def find_single_unique(alns, bam, debug=False): """Extracts single unique alignment for indel detection If there is only one alignment reported by BWA-mem even when '-a' is turned on Args: alns: (list) Pysam AlignedRead objects of the same contig bam: Pysam bam handle Returns: Alignment object or None """ primary_alns = [aln for aln in alns if not aln.is_unmapped and not aln.is_secondary] if len(primary_alns) == 1: if primary_alns[0].mapq > 0: matched_and_insertion_len = sum([a[1] for a in primary_alns[0].cigar if a[0] <= 1]) if float(matched_and_insertion_len) / float(primary_alns[0].rlen) < 0.95: if debug: sys.stdout.write('best alignment less than 0.95 mapped:%s %s\n' % (alns[0].qname, alns[0].cigarstring)) return None else: edit_distance = effective_edit_distance(alns[0]) if edit_distance is not None and float(edit_distance)/float(primary_alns[0].inferred_length) > 0.1: if debug: sys.stdout.write('filter out single uniq alignment %s: edit distance %s - > 0.1 of contig len %d (%.01f)\n' % (alns[0].qname, edit_distance, primary_alns[0].inferred_length, float(edit_distance)/float(primary_alns[0].inferred_length) )) return None else: if debug: sys.stdout.write('filter out single uniq alignment %s: mapq = 0\n' % primary_alns[0].qname) return None #ambiguous_NM = 5 #for aln in alns: #if aln.is_secondary and \ #not re.search('[HS]', aln.cigarstring) and\ #re.match('\d+M', aln.cigarstring) and re.search('\d+M$', aln.cigarstring) and\ #int(aln.opt('NM')) - int(primary_alns[0].opt('NM')) <= ambiguous_NM: #if debug: #sys.stdout.write('secondary alignments too similar %s\n' % primary_alns[0].qname) #return None return Alignment.from_alignedRead(primary_alns[0], bam) else: return None
def gene_aligner(fq1_files, smp_name, args, fq2_files=None): """Mapping reads to genome control or treatment args dict, the arguments of pipeline check index 1. rRNA 2. genome 3. spike-in-rRNA 4. spike-in """ project_path = init_rnaseq_project(args['path_out'], analysis_type=1) gene_align_path = project_path['gene'] ## qc-report qc_path = os.path.join(gene_align_path['report'], 'qc') # QC_reporter(fq1_files, qc_path).run() ## update args args['fq1'] = fq1_files args['fq2'] = fq2_files args['path_out'] = gene_align_path['mapping'] args['smp_name'] = smp_name args['align_to_te'] = False ## run alignment map_bam_list = Alignment(**args).run() ## filt map_genome map_bam = [] for i in map_bam_list: for k in i: if k.endswith('map_' + args['genome'] + '.bam'): map_bam.append(k) # # create bigWig files # for bam in map_bam: # bam2bigwig( # bam=bam, # genome=args['genome'], # path_out=gene_align_path['bigWig'], # strandness=args['s'], # binsize=args['bin_size'], # overwrite=args['overwrite']) return map_bam
def em_step(self, iteration): ffile = open(self.ffilename) efile = open(self.efilename) afile = open(self.afilename) alignments = Alignment.reader_pharaoh(ffile, efile, afile) dirname = os.path.join(self.outputdir, 'iter_%s' % str(iteration + 1).rjust(3, '0')) os.mkdir(dirname) if logger.level >= 1: logger.writeln('\niteration %s' % (iteration + 1)) likelihood = 0 starttime = time.time() for i, alignment in enumerate(alignments, 1): if i % FLAGS.emtrain_log_interval == 0: logger.writeln('%s sentences at %s secs/sent' % (i, (time.time() - starttime) / i)) starttime = time.time() extractor = Extractor( maxabslen=100000, maxlen=10000, minhole=1, maxvars=100000, lexical_weighter=self.lexical_weighter, forbid_adjacent=self.forbid_adjacent, maximize_derivation=self.maximize_derivation, require_aligned_terminal=self.require_aligned_terminal) hg = extractor.extract_hypergraph(alignment) if hg is None: continue # compute expected counts self.compute_expected_counts(hg) likelihood += hg.root.inside treefilename = os.path.join(dirname, 'tree_%s' % str(i).rjust(8, '0')) self.write_viterbi_tree(hg, treefilename) #for edge in hg.edges(): # logger.writeln('%s %s' % (self.counter.get_prob(edge.rule), # edge.rule)) if logger.level >= 1: logger.writeln('likelihood: %s' % likelihood) if logger.level >= 1: logger.writeln('normalizing...') self.counter.normalize_vbdp(self.alpha, self.threshold) if logger.level >= 1: logger.writeln('prob table size: %s' % len(self.counter.prob))
def process_calc(bAuthenticate): # connect to MongoDB client = MongoClient() db = client[database] # DB authentication if required if bAuthenticate: bLoggedIn = db.authenticate(username, password, source=source_database) else: bLoggedIn = True if bLoggedIn: logger.info("Authenticated") pd = db.Project.find_one({"project_code":"MFW001_0-010 Metro Paris-Ligne 15_T2A"}) if pd: logger.info("Project %s found", pd["project_name"]) p = Project(db, pd) p.load() found_domains = Domain.find(db, {"project_id": p._id}) for dom in found_domains: d = Domain(db, dom) d.load() asets = db.AlignmentSet.find({"domain_id": d._id}) for aset in asets: a_set = AlignmentSet(db, aset) a_set.load() #sCode = a_set.item["code"] als = Alignment.find(db, {"alignment_set_id":a_set._id}).sort("PK", 1) cnt = 0. cnt_tot = als.count() for al in als: a = Alignment(db, al) a.setProject(p.item) a.load() cnt+=1. sys.stdout.write("\r{:5s} pk= {:.0f} progress= {:.0%}".format(a_set.item["code"], a.item["PK"], cnt/cnt_tot )) sys.stdout.flush() a.perform_calc(str(datetime.now())) else: logger.error("Authentication failed")
def map_aligns(self, bam, query_fasta, genome_fasta, accessory_known_features=None, find_events=True, max_diff=1): mappings = defaultdict(list) junc_adjs = [] events = [] for query, group in groupby(bam.fetch(until_eof=True), lambda aln: aln.query_name): print 'processing', query aligns = [] for aln in list(group): if not aln.is_unmapped: aligns.append(Alignment.from_alignedRead(aln, bam)) if not aligns: continue query_seq = query_fasta.fetch(query) for align in aligns: if not align.has_canonical_target() or align.blocks is None: continue block_matches = self.map_align(align) if block_matches: tid = self.pick_best_mapping(block_matches, align) if tid is not None: transcript = self.transcripts_dict[tid] olap = self.overlap(align, transcript) mappings[query].append((transcript.gene, transcript.id, olap)) junc_adjs.extend(self.collect_junctions(align, transcript, block_matches[tid])) if find_events: events.extend(find_novel_junctions(block_matches[tid], align, transcript, query_seq, self.genome_fasta, accessory_known_features=accessory_known_features, max_diff=max_diff) ) return mappings, junc_adjs, events
def make_alignment(self, source_alignment_path): # Make the alignment self.alignment = Alignment() self.alignment.read(source_alignment_path) # We start by copying the alignment self.alignment_path = os.path.join(self.cfg.start_tree_path, 'source.phy') if os.path.exists(self.alignment_path): # Make sure it is the same old_align = Alignment() old_align.read(self.alignment_path) if not old_align.same_as(self.alignment): log.error( "Alignment file has changed since previous run. You need to use the force-restart option." ) raise AnalysisError else: self.alignment.write(self.alignment_path)
def em_step(self, iteration): ffile = open(self.ffilename) efile = open(self.efilename) afile = open(self.afilename) alignments = Alignment.reader_pharaoh(ffile, efile, afile) percent_counter = PercentCounter(total=self.corpus_size) dirname = os.path.join(self.outputdir, 'iter_%s' % str(iteration + 1).rjust(3, '0')) os.mkdir(dirname) if logger.level >= 1: logger.writeln('\niteration %s' % (iteration + 1)) likelihood = 0 for i, alignment in enumerate(alignments): percent_counter.print_percent(i) # if logger.level >= 1: # logger.writeln() # logger.writeln('>>> sentence_pair_%s' % i) extractor = Extractor(lexical_weighter=self.lexical_weighter, maximize_derivation=self.maximize_derivation) hg = extractor.extract_hypergraph(alignment) if hg is None: continue # compute expected counts self.compute_expected_counts(hg) likelihood += hg.root.inside treefilename = os.path.join(dirname, 'tree_%s' % str(i + 1).rjust(8, '0')) self.write_viterbi_tree(hg, treefilename) #for edge in hg.edges(): # logger.writeln('%s %s' % (self.counter.get_prob(edge.rule), # edge.rule)) if logger.level >= 1: logger.writeln('likelihood: %s' % likelihood) if logger.level >= 1: logger.writeln('normalizing...') self.counter.normalize_vbdp(self.alpha, self.threshold) if logger.level >= 1: logger.writeln('prob table size: %s' % len(self.counter.prob))
def extra_aligner(fq1_files, smp_name, args, fq2_files=None): """Mapping reads to genome control or treatment args dict, the arguments of pipeline check index 1. rRNA 2. genome 3. spike-in-rRNA 4. spike-in """ project_path = init_rnaseq_project(args['path_out'], analysis_type=1) extra_align_path = project_path['extra'] ## qc-report qc_path = os.path.join(extra_align_path['report'], 'qc') # QC_reporter(fq1_files, qc_path).run() ## update args args['fq1'] = fq1_files args['fq2'] = fq2_files args['path_out'] = extra_align_path['mapping'] args['smp_name'] = smp_name args['align_to_te'] = False # extra small genome, for STAR small_genome = args['small_genome'] args['small_genome'] = True ## run alignment map_bam = Alignment(**args).run() ## return args['small_genome'] = small_genome ## return return map_bam
fh = logging.handlers.RotatingFileHandler('export_pk.log',maxBytes=5000000, backupCount=5) fh.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s') fh.setFormatter(formatter) logger.addHandler(fh) # reading config file sCFGName = 'smt.cfg' smtConfig = ConfigParser.RawConfigParser() smtConfig.read(sCFGName) # setup DB parameter host = smtConfig.get('MONGODB','host') database = smtConfig.get('MONGODB','database') source_database = smtConfig.get('MONGODB','source_database') username = smtConfig.get('MONGODB','username') password = smtConfig.get('MONGODB','password') # connect to MongoDB client = MongoClient() db = client[database] # DB authentication db.authenticate(username,password,source=source_database) # Search for project_code = "MFW001_0-010 Metro Paris-Ligne 15_T2A" pd = db.Project.find_one({"project_code":"MFW001_0-010 Metro Paris-Ligne 15_T2A"}) p = Project(db, pd) p.load() found_domains = Domain.find(db, {"project_id": p._id}) for dom in found_domains: d = Domain(db,dom) d.load() # Export Alignments db, domain_id, csvPk bDone = Alignment.export_data_by_pk(db,d._id,"../data/query_pk.csv","../data/out_pk.csv") print bDone
def read_alignments(self, input_dir, file_format, header_grep=None, compression=None): """ Get list of alignment files from an input directory *.fa, *.fas and *.phy files only Stores in self.files """ optioncheck(compression, [None, 'gz', 'bz2']) if file_format == 'fasta': extensions = ['fa', 'fas', 'fasta'] elif file_format == 'phylip': extensions = ['phy'] else: extensions = [] if compression: extensions = ['.'.join([x, compression]) for x in extensions] files = fileIO.glob_by_extensions(input_dir, extensions) files.sort(key=SORT_KEY) self._input_files = files records = [] pbar = setup_progressbar("Loading files", len(files), simple_progress=True) pbar.start() for i, f in enumerate(files): if compression is not None: with fileIO.TempFile() as tmpfile: with fileIO.freader(f, compression) as reader, fileIO.fwriter(tmpfile) as writer: for line in reader: writer.write(line) try: record = Alignment(tmpfile, file_format, True) except RuntimeError: record = Alignment(tmpfile, file_format, False) else: try: record = Alignment(f, file_format, True) except RuntimeError: record = Alignment(f, file_format, False) if header_grep: try: datatype = 'dna' if record.is_dna() else 'protein' record = Alignment([(header_grep(x), y) for (x, y) in record.get_sequences()], datatype) except TypeError: raise TypeError("Couldn't apply header_grep to header\n" "alignment number={}, name={}\n" "header_grep={}".format(i, fileIO.strip_extensions(f), header_grep)) except RuntimeError: print('RuntimeError occurred processing alignment number={}, name={}' .format(i, fileIO.strip_extensions(f))) raise record.name = (fileIO.strip_extensions(f)) records.append(record) pbar.update(i) pbar.finish() return records
class Analysis(object): """Performs the analysis and collects the results""" def __init__(self, cfg, rpt, force_restart=False, save_phyml=False, threads=-1): cfg.validate() self.cfg = cfg self.rpt = rpt self.threads = threads self.save_phyml = save_phyml self.results = results.AnalysisResults() log.info("Beginning Analysis") if force_restart: # Remove everything if os.path.exists(self.cfg.output_path): log.warning("Deleting all previous workings in '%s'", self.cfg.output_path) shutil.rmtree(self.cfg.output_path) else: # Just remove the schemes folder if os.path.exists(self.cfg.schemes_path): log.info("Removing Schemes in '%s' (they will be " "recalculated from existing subset data)", self.cfg.schemes_path) shutil.rmtree(self.cfg.schemes_path) #check for old analyses to see if we can use the old data self.cfg.check_for_old_config() # Make some folders for the analysis self.cfg.make_output_folders() self.make_alignment(cfg.alignment_path) self.make_tree(cfg.user_tree_topology_path) self.subsets_analysed_set = set() #a counter for user info self.subsets_analysed = 0 #a counter for user info self.total_subset_num = None self.schemes_analysed = 0 #a counter for user info self.total_scheme_num = None def analyse(self): self.do_analysis() self.results.finalise() self.report() return self.results def report(self): best = [ ("Best scheme according to AIC", self.results.best_aic), ("Best scheme according to AICc", self.results.best_aicc), ("Best scheme according to BIC", self.results.best_bic), ] self.rpt.write_best_schemes(best) self.rpt.write_all_schemes(self.results) def make_alignment(self, source_alignment_path): # Make the alignment self.alignment = Alignment() self.alignment.read(source_alignment_path) # We start by copying the alignment self.alignment_path = os.path.join(self.cfg.start_tree_path, 'source.phy') if os.path.exists(self.alignment_path): # Make sure it is the same old_align = Alignment() old_align.read(self.alignment_path) if not old_align.same_as(self.alignment): log.error("Alignment file has changed since previous run. " "You need to use the force-restart option.") raise AnalysisError else: self.alignment.write(self.alignment_path) def make_tree(self, user_path): # Begin by making a filtered alignment, containing ONLY those columns # that are defined in the subsets subset_with_everything = subset.Subset(*list(self.cfg.partitions)) self.filtered_alignment = SubsetAlignment(self.alignment, subset_with_everything) self.filtered_alignment_path = os.path.join(self.cfg.start_tree_path, 'filtered_source.phy') self.filtered_alignment.write(self.filtered_alignment_path) # Now we've written this alignment, we need to lock everything in # place, no more adding partitions, or changing them from now on. self.cfg.partitions.check_against_alignment(self.alignment) self.cfg.partitions.finalise() # We start by copying the alignment self.alignment_path = os.path.join(self.cfg.start_tree_path, 'source.phy') # Now check for the tree tree_path = phyml.make_tree_path(self.filtered_alignment_path) if not os.path.exists(tree_path): # If we have a user tree, then use that, otherwise, create a topology if user_path != None and user_path != "": # Copy it into the start tree folder log.info("Using user supplied topology at %s", user_path) topology_path = os.path.join(self.cfg.start_tree_path, 'user_topology.phy') phyml.dupfile(user_path, topology_path) else: topology_path = phyml.make_topology(self.filtered_alignment_path, self.cfg.datatype) # Now estimate branch lengths if self.cfg.datatype == "DNA": tree_path = phyml.make_branch_lengths(self.filtered_alignment_path, topology_path) elif self.cfg.datatype == "protein": tree_path = phyml.make_branch_lengths_protein(self.filtered_alignment_path, topology_path) self.tree_path = tree_path log.info("Starting tree with branch lengths is here: %s", self.tree_path) def analyse_subset(self, sub, models): """Analyse the subset using the models given This is the core place where everything comes together The results are placed into subset.result """ log.debug("About to analyse %s using models %s", sub, ", ".join(list(models))) #keep people informed about what's going on #if we don't know the total subset number, we can usually get it like this if self.total_subset_num == None: self.total_subset_num = len(sub._cache) old_num_analysed = self.subsets_analysed self.subsets_analysed_set.add(sub.name) self.subsets_analysed = len(self.subsets_analysed_set) if self.subsets_analysed>old_num_analysed: #we've just analysed a subset we haven't seen yet percent_done = float(self.subsets_analysed)*100.0/float(self.total_subset_num) log.info("Analysing subset %d/%d: %.2f%s done" %(self.subsets_analysed,self.total_subset_num, percent_done, r"%")) subset_cache_path = os.path.join(self.cfg.subsets_path, sub.name + '.bin') # We might have already saved a bunch of results, try there first if not sub.results: log.debug("Reading in cached data from the subsets file") sub.read_cache(subset_cache_path) # First, see if we've already got the results loaded. Then we can # shortcut all the other checks models_done = set(sub.results.keys()) log.debug("These models have already been done: %s", models_done) models_required = set(models) models_to_do = models_required - models_done log.debug("Which leaves these models still to analyse: %s", models_to_do) # Empty set means we're done if not models_to_do: log.debug("All models already done, so using just the cached results for subset %s", sub) #if models_done!=set(models): #redo model selection if we have different models sub.model_selection(self.cfg.model_selection, self.cfg.models) return # Make an Alignment from the source, using this subset sub_alignment = SubsetAlignment(self.alignment, sub) sub_path = os.path.join(self.cfg.phyml_path, sub.name + '.phy') # Add it into the sub, so we keep it around sub.alignment_path = sub_path # Maybe it is there already? if os.path.exists(sub_path): log.debug("Found existing alignment file %s", sub_path) old_align = Alignment() old_align.read(sub_path) # It had better be the same! if not old_align.same_as(sub_alignment): log.error("It looks like you have changed one or more of the" "data_blocks in the configuration file, " "so the new subset alignments" " don't match the ones stored for this analysis." "You'll need to run the program with --force-restart") raise AnalysisError else: # We need to write it sub_alignment.write(sub_path) # Try and read in some previous analyses log.debug("Checking for old results in the phyml folder") self.parse_results(sub, models_to_do) if not models_to_do: #if models_done!=set(models): #redo model selection if we have different models sub.model_selection(self.cfg.model_selection, self.cfg.models) return # What is left, we actually have to analyse... tasks = [] #for efficiency, we rank the models by their difficulty - most difficult first difficulty = [] for m in models_to_do: difficulty.append(get_model_difficulty(m)) #hat tip to http://scienceoss.com/sort-one-list-by-another-list/ difficulty_and_m = zip(difficulty, models_to_do) difficulty_and_m.sort(reverse=True) sorted_difficulty, sorted_models_to_do = zip(*difficulty_and_m) log.debug("About to analyse these models, in this order: %s", sorted_models_to_do) for m in sorted_models_to_do: #a_path, out_path = phyml.make_analysis_path(self.cfg.phyml_path, sub.name, m) tasks.append((phyml.analyse, (m, sub_path, self.tree_path, self.cfg.branchlengths))) if self.threads == 1: self.run_models_concurrent(tasks) else: self.run_models_threaded(tasks) # Now parse the models we've just done self.parse_results(sub, models_to_do) # This should be empty NOW! if models_to_do: log.error("Failed to run models %s; not sure why", ", ".join(list(models_to_do))) raise AnalysisError # Now we have analysed all models for this subset, we do model selection # but ONLY on the models specified in the cfg file. sub.model_selection(self.cfg.model_selection, self.cfg.models) # If we made it to here, we should write out the new summary self.rpt.write_subset_summary(sub) # We also need to update this sub.write_cache(subset_cache_path) def parse_results(self, sub, models_to_do): """Read in the results and parse them""" models_done = [] for m in list(models_to_do): # sub.alignment_path stats_path, tree_path = phyml.make_output_path(sub.alignment_path, m) if os.path.exists(stats_path): sub_output = open(stats_path, 'rb').read() # Annotate with the parameters of the model try: result = phyml.parse(sub_output) sub.add_model_result(m, result) # Remove the current model from remaining ones models_to_do.remove(m) # Just used for below models_done.append(m) if self.save_phyml: pass else: os.remove(stats_path) os.remove(tree_path) except phyml.PhymlError: log.warning("Failed loading parse output from %s." "Output maybe corrupted. I'll run it again.", stats_path) if models_done: log.debug("Loaded analysis for %s, models %s", sub, ", ".join(models_done)) def run_models_concurrent(self, tasks): for func, args in tasks: func(*args) def run_models_threaded(self, tasks): pool = threadpool.Pool(tasks, self.threads) pool.join() def analyse_scheme(self, sch, models): self.schemes_analysed = self.schemes_analysed + 1 log.info("Analysing scheme %d/%d" %(self.schemes_analysed, self.total_scheme_num)) for sub in sch: self.analyse_subset(sub, models) # AIC needs the number of sequences number_of_seq = len(self.alignment.species) result = scheme.SchemeResult(sch, number_of_seq, self.cfg.branchlengths) self.results.add_scheme_result(result) # TODO: should put all paths into config. Then reporter should decide # whether to create stuff fname = os.path.join(self.cfg.schemes_path, sch.name+'.txt') self.rpt.write_scheme_summary(result, open(fname, 'w')) return result
formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s') fh.setFormatter(formatter) logger.addHandler(fh) # reading config file sCFGName = 'smt.cfg' smtConfig = ConfigParser.RawConfigParser() smtConfig.read(sCFGName) # setup DB parameter host = smtConfig.get('MONGODB','host') database = smtConfig.get('MONGODB','database') source_database = smtConfig.get('MONGODB','source_database') username = smtConfig.get('MONGODB','username') password = smtConfig.get('MONGODB','password') # connect to MongoDB client = MongoClient() db = client[database] # DB authentication db.authenticate(username,password,source=source_database) # Search for project_code = "MFW001_0-010 Metro Paris-Ligne 15_T2A" pd = db.Project.find_one({"project_code":"MFW001_0-010 Metro Paris-Ligne 15_T2A"}) p = Project(db, pd) p.load() found_domains = Domain.find(db, {"project_id": p._id}) for dom in found_domains: d = Domain(db,dom) d.load() # Example of aggregation aggrList = Alignment.aggregate_by_strata(db, d._id) for ii in aggrList: print ii
def pre_process(optmap_i, optmap_file, myfile, myfile2, output_dir, min_confidence): header_lines = 10 header = [] minrefoverhang = 50000 minqryoverhang = 50000 all_alms = { } # stores all the Alignments for all groups, all_groups[ref] should contain molecule ref qualify_alms = { } # only keep one alignment(the one with highest confidence) for each contig in one molecule removed = { } # removed[ref,qry] == True means alignment for (ref, qry) is already removed # collecting alignments and store in all_groups print '---------------read .xmap file-------------------' with open(myfile + '_flip.xmap', 'rb') as csvfile: csvreader = csv.reader(csvfile, delimiter='\t') for i in range(header_lines): # 10 lines of header header.append(csvreader.next()) # save them # read the first non-header line while True: try: row = csvreader.next() x = Alignment(int(row[1]), int(row[2]), float(row[3]), float(row[4]), float(row[5]), float(row[6]), row[7], float(row[8]), row[9], float(row[10]), float(row[11]), int(row[12]), row[13]) if x.ref not in all_alms: all_alms[x.ref] = [x] else: all_alms[x.ref].append(x) except StopIteration: break num_all_alms = 0 for ref in all_alms: num_all_alms += len(all_alms[ref]) print "In total, the number of alignments collected is ", num_all_alms # only keep one alignment(the one with highest confidence) for each contig in one molecule for ref in all_alms: group = all_alms[ref] qry_bestx = {} for x in group: if x.qry not in qry_bestx: qry_bestx[x.qry] = x else: if x.confidence > qry_bestx[x.qry].confidence: qry_bestx[x.qry] = x qualify_alms[ref] = {} for qry in qry_bestx: qualify_alms[ref][qry] = qry_bestx[qry] num_qualify_alms = 0 for ref in qualify_alms: num_qualify_alms += len(qualify_alms[ref]) # initialize removed array for ref in qualify_alms: for qry in qualify_alms[ref]: removed[ref, qry] = False current_alms = copy_alms(qualify_alms, removed) output_alms(current_alms, output_dir + "/opt_" + str(optmap_i) + "_alms_0_initial.log") print "In total, the number of alignments in qualify_alms is ", num_qualify_alms # remove low confidence alignments print '---------------Remove low quality alignments---------------' for ref in qualify_alms: for qry in qualify_alms[ref]: x = qualify_alms[ref][qry] if x.confidence < min_confidence: removed[ref, qry] = True print 'alignment (', ref, ',', qry, ') is low quality and removed' num_alms = 0 for ref in qualify_alms: for qry in qualify_alms[ref]: if removed[ref, qry] == False: num_alms += 1 current_alms = copy_alms(qualify_alms, removed) output_alms( current_alms, output_dir + "/opt_" + str(optmap_i) + "_alms_1_removed_low_conf.log") print "After removing low confidence alignments, the number of alignments is ", num_alms print '---------------End---------------' # read optical map optmap = {} with open(optmap_file) as f_map: for line in f_map: line = line.strip() if line[0] == '#': continue cols = line.split('\t') CMapId = int(cols[0]) LabelChannel = cols[4] Position = float(cols[5]) if CMapId not in optmap: optmap[CMapId] = [] if LabelChannel == "1": optmap[CMapId].append(Position) for CMapId in optmap: optmap[CMapId].sort() print '---------------scaling-------------------' # calculating scaling qry_len = {} with open(myfile2 + '_key.txt') as f_key: for i in range(0, 4): # 4 header lines f_key.readline() for line in f_key: line = line.strip() cols = line.split('\t') qry_id = int(cols[0]) seq_len = int(cols[2]) qry_len[qry_id] = seq_len scaling = 0 num = 0 with open(myfile + '_r.cmap') as f_q: for i in range(0, 11): # 11 header lines f_q.readline() for line in f_q: line = line.strip() cols = line.split('\t') qry_id = int(cols[0]) appr_len = float(cols[1]) seq_len = qry_len[qry_id] scaling += appr_len / seq_len num += 1 scaling /= num # scaling=1.02258059775 scaling = 1.0 # use scaling to adjsut coordinates of alignments for ref in qualify_alms: for qry in qualify_alms[ref]: x = qualify_alms[ref][qry] x.qrystartpos /= scaling x.qryendpos /= scaling x.qrylen /= scaling x.refstartpos /= scaling x.refendpos /= scaling x.reflen /= scaling # use scaling to adjsut coordinates of optial map for ref in optmap: for i in range(0, len(optmap[ref])): optmap[ref][i] /= scaling print '---------------END-------------------' # find the reference-based coordinates for each contig for ref in qualify_alms: for qry in qualify_alms[ref]: x = qualify_alms[ref][qry] if (x.orientation == '+'): x.qry_left_overlen = x.qrystartpos x.qry_right_overlen = x.qrylen - x.qryendpos else: x.qry_left_overlen = x.qrylen - x.qrystartpos x.qry_right_overlen = x.qryendpos x.start = x.refstartpos - x.qry_left_overlen x.end = x.refendpos + x.qry_right_overlen x.ref_left_overlen = x.refstartpos x.ref_right_overlen = x.reflen - x.refendpos if (x.orientation == '+'): x.refstart = x.qrystartpos - x.ref_left_overlen x.refend = x.qryendpos + x.ref_right_overlen else: x.refstart = x.qryendpos - x.ref_right_overlen x.refend = x.qrystartpos + x.ref_left_overlen num_alms = 0 for ref in qualify_alms: for qry in qualify_alms[ref]: if removed[ref, qry] == False: num_alms += 1 current_alms = copy_alms(qualify_alms, removed) output_alms(current_alms, output_dir + "/opt_" + str(optmap_i) + "_alms_2_scaled.log") print "After scaling, the number of alignments is ", num_alms # read qry map qry_markers = {} with open(myfile + '_r.cmap') as f_q: for i in range(11): # 10 lines of header header_line = f_q.readline() for line in f_q: line = line.strip() cols = line.split('\t') CMapId = int(cols[0]) ContigLength = float(cols[1]) NumSites = int(cols[2]) SiteID = int(cols[3]) LabelChannel = cols[4] Position = float(cols[5]) if LabelChannel == "0": continue if CMapId not in qry_markers: qry_markers[CMapId] = [] Position /= scaling qry_markers[CMapId].append(Position) for CMapId in qry_markers: qry_markers[CMapId].sort() f_q.close() print '---------------candidate cutting sites-------------------' fpair = file(output_dir + "/chimeric_pairs_" + str(optmap_i) + ".log", 'w') fpair.write("ref_id\tref_pos\tqry_id\tqry_pos\n") chimeric_pairs = [] for ref in qualify_alms: for qry in qualify_alms[ref]: if removed[ref, qry] == True: continue x = qualify_alms[ref][qry] if (x.confidence > min_confidence): ref_left_overlen = x.refstartpos ref_right_overlen = x.reflen - x.refendpos flag_left = False flag_right = False if (x.qry_left_overlen > minqryoverhang and ref_left_overlen > minrefoverhang and markers_in_qry_left_overhang(qry_markers, x) > 0): flag_left = True chimeric_pairs.append( (x.ref, x.refstartpos, x.qry, x.qrystartpos)) print( x.ref, x.refstartpos, x.qry, x.qrystartpos), "is a pair of candidate cutting sites" fpair.write( str(x.ref) + "\t" + str(x.refstartpos) + "\t" + str(x.qry) + "\t" + str(x.qrystartpos) + "\n") if (x.qry_right_overlen > minqryoverhang and ref_right_overlen > minrefoverhang and markers_in_qry_right_overhang(qry_markers, x) > 0): flag_right = True chimeric_pairs.append( (x.ref, x.refendpos, x.qry, x.qryendpos)) print(x.ref, x.refendpos, x.qry, x.qryendpos), "is a pair of candidate cutting sites" fpair.write( str(x.ref) + "\t" + str(x.refendpos) + "\t" + str(x.qry) + "\t" + str(x.qryendpos) + "\n") if flag_left == True and flag_right == True: removed[ref, qry] = True fpair.close() print '---------------END-------------------' num_alms = 0 for ref in qualify_alms: for qry in qualify_alms[ref]: if removed[ref, qry] == False: num_alms += 1 current_alms = copy_alms(qualify_alms, removed) output_alms( current_alms, output_dir + "/opt_" + str(optmap_i) + "_alms_3_removed_both_overhang.log") print "After removing alignments with both overhangs, the number of alignments is ", num_alms # check overlap between alignments for r in qualify_alms: for q1 in qualify_alms[r]: if removed[r, q1] == True: continue x = qualify_alms[r][q1] for q2 in qualify_alms[r]: if removed[r, q2] == True: continue y = qualify_alms[r][q2] if q1 >= q2: continue if x.refstartpos <= y.refstartpos and y.refstartpos <= x.refendpos: overlap = min(x.refendpos, y.refendpos) - y.refstartpos elif y.refstartpos <= x.refstartpos and x.refstartpos <= y.refendpos: overlap = min(x.refendpos, y.refendpos) - x.refstartpos else: overlap = 0 if overlap >= 20000: if x.confidence < y.confidence: removed[r, q1] = True else: removed[r, q2] = True num_alms = 0 for ref in qualify_alms: for qry in qualify_alms[ref]: if removed[ref, qry] == False: num_alms += 1 current_alms = copy_alms(qualify_alms, removed) output_alms( current_alms, output_dir + "/opt_" + str(optmap_i) + "_alms_4_solved_overlaps.log") print "After removing one of two overlap alignments, the number of alignments is ", num_alms return current_alms, optmap, chimeric_pairs
def analyse_subset(self, sub, models): """Analyse the subset using the models given This is the core place where everything comes together The results are placed into subset.result """ log.debug("About to analyse %s using models %s", sub, ", ".join(list(models))) #keep people informed about what's going on #if we don't know the total subset number, we can usually get it like this if self.total_subset_num == None: self.total_subset_num = len(sub._cache) old_num_analysed = self.subsets_analysed self.subsets_analysed_set.add(sub.name) self.subsets_analysed = len(self.subsets_analysed_set) if self.subsets_analysed>old_num_analysed: #we've just analysed a subset we haven't seen yet percent_done = float(self.subsets_analysed)*100.0/float(self.total_subset_num) log.info("Analysing subset %d/%d: %.2f%s done" %(self.subsets_analysed,self.total_subset_num, percent_done, r"%")) subset_cache_path = os.path.join(self.cfg.subsets_path, sub.name + '.bin') # We might have already saved a bunch of results, try there first if not sub.results: log.debug("Reading in cached data from the subsets file") sub.read_cache(subset_cache_path) # First, see if we've already got the results loaded. Then we can # shortcut all the other checks models_done = set(sub.results.keys()) log.debug("These models have already been done: %s", models_done) models_required = set(models) models_to_do = models_required - models_done log.debug("Which leaves these models still to analyse: %s", models_to_do) # Empty set means we're done if not models_to_do: log.debug("All models already done, so using just the cached results for subset %s", sub) #if models_done!=set(models): #redo model selection if we have different models sub.model_selection(self.cfg.model_selection, self.cfg.models) return # Make an Alignment from the source, using this subset sub_alignment = SubsetAlignment(self.alignment, sub) sub_path = os.path.join(self.cfg.phyml_path, sub.name + '.phy') # Add it into the sub, so we keep it around sub.alignment_path = sub_path # Maybe it is there already? if os.path.exists(sub_path): log.debug("Found existing alignment file %s", sub_path) old_align = Alignment() old_align.read(sub_path) # It had better be the same! if not old_align.same_as(sub_alignment): log.error("It looks like you have changed one or more of the" "data_blocks in the configuration file, " "so the new subset alignments" " don't match the ones stored for this analysis." "You'll need to run the program with --force-restart") raise AnalysisError else: # We need to write it sub_alignment.write(sub_path) # Try and read in some previous analyses log.debug("Checking for old results in the phyml folder") self.parse_results(sub, models_to_do) if not models_to_do: #if models_done!=set(models): #redo model selection if we have different models sub.model_selection(self.cfg.model_selection, self.cfg.models) return # What is left, we actually have to analyse... tasks = [] #for efficiency, we rank the models by their difficulty - most difficult first difficulty = [] for m in models_to_do: difficulty.append(get_model_difficulty(m)) #hat tip to http://scienceoss.com/sort-one-list-by-another-list/ difficulty_and_m = zip(difficulty, models_to_do) difficulty_and_m.sort(reverse=True) sorted_difficulty, sorted_models_to_do = zip(*difficulty_and_m) log.debug("About to analyse these models, in this order: %s", sorted_models_to_do) for m in sorted_models_to_do: #a_path, out_path = phyml.make_analysis_path(self.cfg.phyml_path, sub.name, m) tasks.append((phyml.analyse, (m, sub_path, self.tree_path, self.cfg.branchlengths))) if self.threads == 1: self.run_models_concurrent(tasks) else: self.run_models_threaded(tasks) # Now parse the models we've just done self.parse_results(sub, models_to_do) # This should be empty NOW! if models_to_do: log.error("Failed to run models %s; not sure why", ", ".join(list(models_to_do))) raise AnalysisError # Now we have analysed all models for this subset, we do model selection # but ONLY on the models specified in the cfg file. sub.model_selection(self.cfg.model_selection, self.cfg.models) # If we made it to here, we should write out the new summary self.rpt.write_subset_summary(sub) # We also need to update this sub.write_cache(subset_cache_path)
class Line: """ Represents a line in the SAM file. """ TYPE_HEADER = 0 TYPE_ALIGNMENT = 1 def __init__(self, line_string): self.type = self.TYPE_HEADER if line_string.startswith('@') \ else self.TYPE_ALIGNMENT if self.type == self.TYPE_HEADER: self.fields = [line_string] return self.fields = line_string.split() pos, cigar = self.fields[3], self.fields[5] if cigar == '*': raise CigarUnavailableError md = next(filter(lambda field: field.startswith('MD:Z:'), self.fields)) md = md.replace('MD:Z:', '') self.alignment = Alignment(pos, cigar, md) def soft_clip(self, start, stop): if self.type == self.TYPE_HEADER: return self.strip_paired_end_info() self.fields[2] = '{}:{}-{}'.format(self.fields[2], start, stop) self.alignment.soft_clip(start, stop) self.fields[3] = str(self.alignment.pos) self.fields[5] = self.alignment.cigar self.fields = list(map( lambda field: 'MD:Z:'+self.alignment.md if \ field.startswith('MD:Z:') else field, self.fields)) def strip_paired_end_info(self): ''' fields[1]: Bitwise flags according to the SAM specifications: 1 -- template having multiple segments in sequencing 2 -- each segment properly aligned according to the aligner 4 -- segment unmapped 8 -- next segment in template unmapped 16 -- SEQ being reverse complemented 32 -- SEQ of the next segment in the template being reversed complemented 64 -- the first segment in the template 128 -- the last segment in the template ... fields[6]: reference sequence name of the primary alignment of the next read in the template; '*' when information is unavailable. fields[7]: 1-based position of the primary alignment of the next read in the template; '0' when information is unavailable. fields[8]: signed observed template length; '0' for single-segment template, or when information is unavailable. ''' flags = int(self.fields[1]) flags &= 0b00111100 self.fields[1] = str(flags) self.fields[6:9] = ['*', '0', '0'] def __repr__(self): return '\t'.join(self.fields)
class Person(object): def __init__(self, age=None, gender=None, genus='human'): self.player_controlled = False self._event_type = 'person' self.firstname = u"Антон" self.surname = u"Сычов" self.nickname = u"Сычуля" self.alignment = Alignment() self.features = [] # gets Feature() objects and their child's. Add new Feature only with self.add_feature() self.tokens = [] # Special resources to activate various events self.relations_tendency = {'convention': 0, 'conquest': 0, 'contribution': 0} #obedience, dependecy and respect stats self._stance = [] self.avatar_path = '' self.master = None # If this person is a slave, the master will be set self.supervisor = None self.slaves = [] self.subordinates = [] self.ap = 1 self.schedule = Schedule(self) self.modifiers = Modifiers() # init starting features self.availabe_actions = [] # used if we are playing slave-part self.allowance = 0 # Sparks spend each turn on a lifestyle self.ration = { "amount": 'unlimited', # 'unlimited', 'limited' by price, 'regime' for figure, 'starvation' no food "food_type": "cousine", # 'forage', 'sperm', 'dry', 'canned', 'cousine' "target": 0, # figures range -2:2 "limit": 0, # maximum resources spend to feed character each turn "overfeed": 0, } self.accommodation = 'makeshift' self.skills = [] self.specialized_skill = None self.focused_skill = None self.skills_used = [] self.factors = [] self.restrictions = [] self._needs = init_needs(self) self.attributes = { 'physique': 3, 'mind': 3, 'spirit': 3, 'agility': 3, 'sensitivity':3 } self.university = {'name': 'study', 'effort': 'bad', 'auto': False} self.mood = 0 self.fatigue = 0 self._vitality = 0 self.appetite = 0 self.calorie_storage = 0 self.money = 0 self._determination = 0 self._anxiety = 0 self.rewards = [] self.used_rewards = [] self.merit = 0 # player only var for storing work result # Other persons known and relations with them, value[1] = [needed points, current points] self._relations = [] self.selfesteem = 0 self.conditions = [] self.genus = init_genus(self, genus) self.add_feature(age) self.add_feature(gender) self.set_avatar() persons_list.append(self) def set_avatar(self): path = 'images/avatar/' path += self.genus.head_type + '/' if self.gender != None: if self.gender == 'sexless': gender = 'male' elif self.gender == 'shemale': gender = 'female' else: gender = self.gender path += gender + '/' if self.age != None: path += self.age + '/' this_avas = [ava for ava in get_avatars() if ava.startswith(path)] try: avatar = choice(this_avas) avatar_split = avatar.split('/') for str_ in avatar_split: if 'skin' in str_: skin_color = str_.split('_')[0] self.add_feature(skin_color) if 'hair' in str_: hair_color = str_.split('_')[0] self.hair_color = hair_color self.avatar_path = avatar except IndexError: self.avatar_path = 'images/avatar/none.jpg' def randomise(self, gender='female', age='adolescent'): self.add_feature(gender) self.add_feature(age) self.random_alignment() self.random_skills() self.random_features() return def random_alignment(self): # roll activity roll = randint(1, 100) if roll <= 20: self.alignment.activity = "timid" elif roll > 80: self.alignment.activity = "ardent" else: self.alignment.activity = "reasonable" # roll orderliness roll = randint(1, 100) if roll <= 20: self.alignment.orderliness = "chaotic" elif roll > 80: self.alignment.orderliness = "lawful" else: self.alignment.orderliness = "conformal" # roll morality roll = randint(1, 100) if roll <= 20: self.alignment.morality = "evil" elif roll > 80: self.alignment.morality = "good" else: self.alignment.morality = "selfish" return def random_skills(self, pro_skill=None, talent_skill=None): skilltree = ('coding', 'sport', 'conversation', 'sex', None) if talent_skill: self.skill(talent_skill).talent = True else: roll = choice(skilltree) if roll: self.skill(roll).talent = True if pro_skill: self.skill('pro_skill').profession() else: roll = choice(skilltree) if roll: self.skill(roll).profession() return def random_features(self): # constitution const = choice(('athletic', 'brawny', 'large', 'small', 'lean', 'crooked', 'clumsy')) roll = randint(1, 100) if roll > 40: self.add_feature(const) # soul soul = choice(('brave', 'shy', 'smart', 'dumb', 'sensitive', 'cool', None)) if soul: self.add_feature(soul) # needs needstree = {'prosperity_feat': ('greedy', 'generous'), 'nutrition_feat': ('gourmet', 'moderate_eater'), 'wellness_feat': ('low_pain_threshold', 'high_pain_threshold'), 'comfort_feat': ('sybarite', 'ascetic'), 'activity_feat': ('energetic', 'lazy'), 'communication_feat': ('extrovert', 'introvert'), 'amusement_feat': ('curious', 'dull'), 'authority_feat': ('dominant', 'submissive'), 'ambition_feat': ('ambitious', 'modest'), 'eros_feat': ('lewd', 'frigid'), } for need in needstree: roll = randint(1, 100) if roll <= 20: self.add_feature(needstree[need][0]) elif roll > 80: self.add_feature(needstree[need][1]) return def change_genus(self, genus): self.genus = init_genus(self, genus) @property def known_characters(self): l = [] for r in self._relations: persons = [p for p in r.persons if p != self] l += persons return l def add_modifier(self, name, attributes, time=None): self.modifiers.add_item(name, attributes, time) def count_modifiers(self, key): val = self.__dict__['modifiers'].get_modified_attribute(key) return val @property def focus(self): try: return self.focused_skill.focus except AttributeError: return 0 @property def job(self): job = self.schedule.find_by_slot('job') if job == None: return 'idle' else: return job.name @property def minor(self): minor = self.schedule.find_by_slot('minor') if minor == None: return 'idle' else: return minor.name def show_job(self): job = self.schedule.find_by_slot('job') if not job: return 'idle' else: values = [] s = '' for k, v in job.special_values.items(): s += '%s: '%(k) try: l = [i for i in v] try: for i in l: s += '%s, '%(i.name()) except AttributeError: for i in l: s += '%s, '%(i) except TypeError: try: s += '%s, '%(v.name()) except AttributeError: s += '%s, '%(v) if k not in job.special_values.items()[-1]: s += '\n' return '%s, %s'%(job.name, s) def job_object(self): job = self.schedule.find_by_slot('job') if not job: return None else: return job def __getattribute__(self, key): if not key.startswith('__') and not key.endswith('__'): try: genus = super(Person, self).__getattribute__('genus') value = getattr(genus, key) genus.last_caller = self return value except AttributeError: pass return super(Person, self).__getattribute__(key) def __getattr__(self, key): if key in self.attributes: value = self.attributes[key] value += self.count_modifiers(key) if value < 1: value = 1 if value > 5: value = 5 return value n = self.get_all_needs() if key in n.keys(): return n[key] else: raise AttributeError(key) def __setattr__(self, key, value): if 'attributes' in self.__dict__: if key in self.attributes: value -= self.count_modifiers(key) self.attributes[key] = value if self.attributes[key] < 0: self.attributes[key] = 0 super(Person, self).__setattr__(key, value) @property def determination(self): return self._determination @determination.setter def determination(self, value): self._determination = value if self._determination < 0: self._determination = 0 @property def anxiety(self): return self._anxiety @anxiety.setter def anxiety(self, value): self._anxiety = value if self._anxiety < 0: self_anxiety = 0 def modifiers_separate(self, modifier, names=False): return self.modifiers.get_modifier_separate(modifier, names) def vitality_info(self): d = {'physique': self.physique, 'shape': self.count_modifiers('shape'), 'fitness':self.count_modifiers('fitness'), 'mood': self.mood, 'therapy': self.count_modifiers('therapy')} l = self.modifiers_separate('vitality', True) return d, l @property def vitality(self): l = [self.physique, self.count_modifiers('shape'), self.count_modifiers('fitness'), self.mood, self.count_modifiers('therapy')] l += self.modifiers_separate('vitality') l = [i for i in l if i != 0] lgood = [] lbad = [] for i in l: if i > 0: lgood.append(i) elif i < 0: lbad.append(i) val = 0 bad = len(lbad) lgood.sort() for i in range(bad): try: lgood.pop(0) except IndexError: return 0 while len(lgood) > 0: num = min(lgood) if num > val: val += 1 lgood.remove(num) val += self._vitality if val > 5: val = 5 return val @property def gender(self): try: gender = self.feature_by_slot('gender').name return gender except AttributeError: return None @property def age(self): try: gender = self.feature_by_slot('age').name return gender except AttributeError: return None def phobias(self): l = [] for feature in self.features: if isinstance(feature, Phobia): l.append(feature.object_of_fear) return l def get_needs(self): d = {} for need in self._needs: if need.level > 0: d[need.name] = need return d def get_all_needs(self): d = {} for need in self._needs: d[need.name] = need return d def show_taboos(self): s = "" for taboo in self.taboos: if taboo.value != 0: s += "{taboo.name}({taboo.value}), ".format(taboo=taboo) return s def show_needs(self): s = "" for need in self.get_needs().values(): s += "{need.name}({need.level}), ".format(need=need) return s def show_features(self): s = "" for feature in self.features: if feature.visible: s += "{feature.name}, ".format(feature=feature) return s def show_focus(self): if isinstance(self.focused_skill, Skill): return self.focused_skill.name else: return "No focused skill" def show_skills(self): s = "" for skill in self.skills: s += "{name}({skill.level}, {skill.attribute}({value}))".format(name=skill.name, skill=skill, value=skill.attribute_value()) if skill != self.skills[len(self.skills)-1]: s += ', ' return s def show_mood(self): m = {-1: '!!!CRUSHED!!!', 0: 'Gloomy', 1: 'Tense', 2:'Content', 3: 'Serene', 4: 'Jouful', 5:'Enthusiastic'} mood = self.mood return "{mood}({val})".format(mood=m[mood], val=mood) def show_attributes(self): s = "" for key in self.attributes.keys(): s += "{0}({1})".format(key, getattr(self, key)) return s def show_tokens_difficulty(self): s = "" for key, value in self.tokens_difficulty.items(): s += "{0}({1}), ".format(key, value) return s def name(self): s = self.firstname + " " + self.surname return s def taboo(self, name): for t in self.taboos: if t.name == name: return t return "No taboo named %s"%(name) def skill(self, skillname): skill = None for i in self.skills: if i.name == skillname: skill = i return skill if skillname in skills_data: skill = Skill(self, skillname, skills_data[skillname]) self.skills.append(skill) return skill else: raise Exception("No skill named %s in skills_data"%(skillname)) def tick_features(self): for feature in self.features: feature.tick_time() def use_skill(self, name): if isinstance(name, Skill): self.skills_used.append(name) else: self.skills_used.append(self.skill(name)) def get_used_skills(self): l = [] for skill in self.skills_used: if isinstance(skill, Skill): l.append(skill) else: l.append(self.skill(skill)) return l def calc_focus(self): if self.focused_skill: if self.focused_skill in self.get_used_skills(): self.focused_skill.focus += 1 self.skills_used = [] return try: self.focused_skill.focus = 0 except AttributeError: pass if len(self.skills_used) > 0: from collections import Counter counted = Counter() for skill in self.get_used_skills(): counted[skill.name]+=1 maximum = max(counted.values()) result = [] for skill in counted: if counted[skill] == maximum: result.append(skill) self.skill(choice(result)).set_focus() else: self.focused_skill = None self.skills_used = [] def recalculate_mood(self): mood = 0 happines = [] dissapointment = [] dissapointments_inf = [] satisfactions_inf = collections.defaultdict(list) determination = [] anxiety = [] for need in self.get_needs().values(): if need.tension and need.level > 0: dissapointment.append(need.level) dissapointments_inf.append(need) if need.satisfaction > 0: happines.append(need.satisfaction) satisfactions_inf[need.satisfaction].append(need) if need.level == 3: happines.append(need.satisfaction) satisfactions_inf[need.satisfaction].append(need) for i in range(self.determination): happines.append(1) determination.append('determination') for i in range(self.anxiety): dissapointment.append(1) anxiety.append('anxiety') hlen = len(happines) dlen = len(dissapointment) happines.sort() dissapointment.sort() renpy.call_in_new_context('mood_recalc_result', dissapointments_inf, satisfactions_inf, determination, anxiety, True, self) if hlen > dlen: dissapointment = [] for i in range(dlen): happines.pop(0) threshold = happines.count(5) sens = 5-self.sensitivity if threshold > sens: mood = 5 elif threshold+happines.count(4) > sens: mood = 4 elif threshold+happines.count(4)+happines.count(3) > sens: mood = 3 elif threshold+happines.count(4)+happines.count(3)+happines.count(2) > sens: mood = 2 elif threshold+happines.count(4)+happines.count(3)+happines.count(2)+happines.count(1) > sens: mood = 1 elif hlen < dlen: axniety_holder = self.anxiety happines = [] for i in range(hlen): dissapointment.pop(0) dissapointment = [i for i in dissapointment if i > 1] despair = 6-self.sensitivity-dissapointment.count(2) despair2 = dissapointment.count(3) if despair < 0: if abs(despair) > self.anxiety: self.anxiety += 1 mood = -1 else: despair2 -= despair if despair2 > 0: self.anxiety += despair2 mood = -1 else: mood = 0 for key in satisfactions_inf: for need in satisfactions_inf[key]: need.satisfaction = 0 need.tension = False for need in dissapointments_inf: need.satisfaction = 0 need.tension = False self.mood = mood def motivation(self, skill=None, tense_needs=[], satisfy_needs=[], beneficiar = None, morality=0, special=[]):# needs should be a list of tuples[(need, shift)] motiv = 0 motiv += morality for i in special: motiv += i if skill: if self.skill(skill).talent: motiv += 1 elif self.skill(skill).inability: motiv -= 1 intense = [] self_needs = self.get_needs() for need in tense_needs: if need in self_needs.keys(): motiv -= 1 for need in satisfy_needs: if need in self_needs.keys(): intense.append(self_needs[need].level) try: maximum = max(intense) except ValueError: maximum = 0 motiv += maximum if beneficiar: if beneficiar == self: motiv += 2 else: motiv += self.stance(beneficiar).value if self.stance(beneficiar) < 0: motiv = 0 if beneficiar == self.master or beneficiar == self.supervisor: if self.stance(beneficiar).value == 0: motiv = min(beneficiar.mind, beneficiar.spirit) elif self.stance(beneficiar).value == 2: motiv = 5 if motiv < 0: motiv = 0 if motiv > 5: motiv = 5 return motiv def add_feature(self, name): # adds features to person, if mutually exclusive removes old feature Feature(self, name) def add_phobia(self, name): Phobia(self, name) def feature_by_slot(self, slot): # finds feature which hold needed slot for f in self.features: if f.slot == slot: return f return None def feature(self, name): # finds feature with needed name if exist for f in self.features: if f.name == name: return f return None def remove_feature(self, feature): # feature='str' or Fearutere() if isinstance(feature, str): for f in self.features: if f.name == feature: f.remove() else: i = self.features.index(feature) self.features[i].remove() return def remove_feature_by_slot(self, slot): for f in self.features: if f.slot == slot: f.remove() def description(self): txt = self.firstname + ' "' + self.nickname + '" ' + self.surname txt += '\n' for feature in self.features: txt += feature.name txt += ',' return txt def reset_needs(self): for need in self.get_all_needs().values(): need.reset() def rest(self): self.conditions = [] self.modifiers.tick_time() self.tick_features() self.schedule.use_actions() self.fatness_change() self.recalculate_mood() self.reset_needs() self.calc_focus() self.reduce_esteem() def food_demand(self): """ Evaluate optimal food consumption to maintain current weight. :return: """ demand = self.physique demand += self.appetite demand += self.count_modifiers('food_demand') if demand < 1: demand = 1 return demand def food_desire(self): """ Evaluate ammount of food character likes to consume. :return: """ desire = self.food_demand() if self.nutrition.level == 0: desire -= 1 elif self.nutrition.level == 3: desire += 1 if self.feature('obese'): desire -= 1 elif self.feature('emaciated'): desire += 2 elif self.feature('slim'): desire += 1 desire += self.count_modifiers("food_desire") if desire < 1: desire = 1 return desire def get_food_consumption(self, show_multi=False): types = {'sperm': 0, 'forage': 0, 'dry': 1, 'canned': 2, 'cousine': 2} value = self.consume_food() multiplier = types[self.ration['food_type']] if show_multi: return value*multiplier, self.ration['food_type'] return value * multiplier def consume_food(self): food_consumed = self.food_desire() fatness = self.feature_by_slot('shape') if fatness: fatness = fatness.name flist = ['emaciated' ,'slim', None, 'chubby', 'obese'] val = flist.index(fatness) if self.ration['amount'] == 'starvation': food_consumed = 0 if self.ration['amount'] == 'limited': if food_consumed > self.ration["limit"]: food_consumed = self.ration["limit"] if self.ration['amount'] == 'regime': food_consumed = self.food_demand() if self.ration['target'] > val: food_consumed += 1+self.appetite if self.ration['target'] < val: food_consumed = self.food_demand() - 1 if self.ration['target'] == val: food_consumed = self.food_demand() return food_consumed def fatness_change(self): consumed = self.consume_food() demand = self.food_demand() desire = self.food_desire() calorie_difference = consumed-demand if consumed < desire: self.nutrition.set_tension() if self.ration['amount'] != 'starvation': d = {'sperm': -4, 'forage': -1, 'dry': -2, 'canned': 0, 'cousine': 3} if d[self.ration['food_type']] < 0: self.nutrition.set_tension() else: self.nutrition.satisfaction = d[self.ration['food_type']] self.calorie_storage += calorie_difference fatness = self.feature_by_slot('shape') if fatness != None: fatness = fatness.name flist = ['emaciated' ,'slim', None, 'chubby', 'obese'] ind = flist.index(fatness) if self.calorie_storage <= 0: self.remove_feature('dyspnoea') if self.calorie_storage >= 0: self.remove_feature('starving') if self.calorie_storage < 0: chance = randint(-10, -1) if self.calorie_storage <= chance: ind -= 1 if self.feature('dyspnoea'): self.remove_feature('dyspnoea') if ind < 0: ind = 0 if self.feature('starving'): self.add_feature('dead') else: self.add_feature('starving') f = flist[ind] if f: self.add_feature(f) else: self.feature_by_slot('shape').remove() if not self.feature('starving'): self.calorie_storage = 0 return 'fatness -' if self.calorie_storage > 0: chance = randint(1, 10) if self.calorie_storage >= chance: ind += 1 if ind > 4: ind = 4 if self.feature('dyspnoea'): self.add_feature('diabetes') else: self.add_feature('dyspnoea') f = flist[ind] if f: self.add_feature(f) else: self.feature_by_slot('shape').remove() if not self.feature('dyspnoea'): self.calorie_storage = 0 return 'fatness +' def nutrition_change(self, food_consumed): if food_consumed < self.food_demand(): self.ration["overfeed"] -= 1 chance = randint(-10, -1) if self.ration["overfeed"] <= chance: self.ration["overfeed"] = 0 return def know_person(self, person): if person in self.known_characters: return True return False def _set_relations(self, person): relations = Relations(self, person) person._relations.append(relations) self._relations.append(relations) return relations def relations(self, person): if person==self: raise Exception("relations: target and caller is same person") if isinstance(person, Fraction): return self.relations(person.owner) if not self.know_person(person): relations = self._set_relations(person) self._set_stance(person) return relations for rel in self._relations: if self in rel.persons and person in rel.persons: return rel def _set_stance(self, person): stance = Stance(self, person) self._stance.append(stance) person._stance.append(stance) return stance def stance(self, person): if person==self: raise Exception("stance: target and caller is same person") if isinstance(person, Fraction): return self.stance(person.owner) elif not self.know_person(person): self._set_relations(person) stance = self._set_stance(person) else: for s in self._stance: if self in s.persons and person in s.persons: stance = s if person in self.slaves: stance._type = 'master' elif person == self.master: stance._type = 'slave' else: stance._type = 'neutral' return stance def use_token(self, token): if self.has_token(token): self.tokens.remove(token) else: return "%s has no token named %s"%(self.name(), token) def has_token(self, token): if token in self.tokens: return True return False def has_any_token(self): if len(self.tokens) > 0: return True return False def add_token(self, token, free=False): if not self.has_token(token): self.tokens.append(token) if token not in ('accordance', 'antagonism'): if not free: self.player_relations().stability += 1 self.relations_tendency[token] += 1 renpy.call_in_new_context('lbl_notify', self, token) def player_relations(self): for rel in self._relations: if rel.is_player_relations(): return rel return None def moral_action(self, *args, **kwargs): for arg in args: if isinstance(arg, int): self.selfesteem += arg return result = self.check_moral(*args, **kwargs) self.selfesteem += result return result def check_moral(self, *args, **kwargs): result = 0 act = {'ardent': 1, 'reasonable': 0, 'timid': -1} moral = {'good': 1, 'selfish': 0, 'evil': -1} order = {'lawful': 1, 'conformal': 0, 'chaotic': -1} action_tones = {'activity': None, 'morality': None, 'orderliness': None} activity = None morality = None orderliness = None target = None if 'target' in kwargs: if isinstance(kwargs['target'], Person): target = kwargs['target'] else: for arg in args: if isinstance(arg, Person): target=arg for arg in args: if arg in act.keys(): activity = arg if arg in moral.keys(): morality = arg if arg in order.keys(): orderliness = arg for k, v in action_tones.items(): if v: valself = getattr(self.alignment, k) valact = v if valself != 0: if valself + valact == 0: result -= 1 elif abs(valself + valact) == 2: result += 1 elif target: if valact != 0: if getattr(self.relations(target), Alignment.relation_binding[k]) != valact: result -= 1 else: result += 1 return result def reduce_esteem(self): if self.selfesteem == 0: return val = 5-self.sensitivity if self.selfesteem > 0: self.selfesteem -= val if val < 0: val = 0 elif self.selfesteem < 0: self.selfesteem += val if val > 0: val = 0 def enslave(self, target): target.master = self target.supervisor = self self.slaves.append(target) self.relations(target) def set_supervisor(self, supervisor): self.supervisor = supervisor def master_stance(self, target): if self.player_controlled: raise Exception('master_stance is only for npc') stance = self.stance(target).level l = ['cruel', 'opressive', 'rightful', 'benevolent'] ind = l.index(stance) return ind def desirable_relations(self): d = {'lawful': ('formal', 'loyality'), 'chaotic': ('intimate', 'scum-slave'), 'timid': ('delicate', 'worship'), 'ardent': ('intense', 'disciple'), 'good': ('supporter', 'dedication'), 'evil': ('contradictor', 'henchman')} return [d.get(x) for x in self.alignment.description()] def willing_available(self): if not self.master: return [] rel_check = False rel = self.desirable_relations() types = [x[1] for x in rel if isinstance(x, tuple)] check = [x[0] for x in rel if isinstance(x, tuple)] for rel in self.relations(self.master).description(): if rel in check: rel_check = True break if self.stance(self.master).respect() < self.spirit: rel_check = False if not self.has_token('accordance'): rel_check = False if rel_check: return types else: return [] def attitude_tendency(self): n = 0 token = None for k, v in self.relations_tendency.items(): if v > n: n = v token = k if self.relations_tendency.values().count(n) > 1: return None return token def add_condition(self, condition): if not self.has_condition(condition): self.conditions.append(condition) def has_condition(self, condition): if condition in self.conditions: return True return False def remove_condition(self, condition): try: self.conditions.remove(condition) except ValueError: pass
class Analysis(object): """Performs the analysis and collects the results""" def __init__(self, cfg, force_restart, threads): the_config.validate() # TODO: Remove -- put this all into "options" if threads == -1: threads = threadpool.get_cpu_count() self.threads = threads # TODO: Move these to the config validate and prepare log.info("Beginning Analysis") self.process_restart(force_restart) # Make some folders for the analysis the_config.make_output_folders() the_config.database = Database(the_config) # Check for old analyses to see if we can use the old data the_config.check_for_old_config() # TODO: This is going to be in "Prepare" self.make_alignment(cfg.alignment_path) self.make_tree(cfg.user_tree_topology_path) # We need this to block the threads for critical stuff self.lock = threading.Condition(threading.Lock()) # Store the result in here self.results = results.AnalysisResults(the_config.model_selection) def process_restart(self, force_restart): if force_restart: # Remove everything if os.path.exists(the_config.output_path): log.warning("Deleting all previous workings in '%s'" % the_config.output_path) shutil.rmtree(the_config.output_path) else: # Remove the schemes folder, and clean out the phylofiles folder if os.path.exists(the_config.schemes_path): log.debug("Removing files in '%s'" % the_config.schemes_path) shutil.rmtree(the_config.schemes_path) if os.path.exists(the_config.phylofiles_path): log.debug("Removing files in '%s'" % the_config.phylofiles_path) shutil.rmtree(the_config.phylofiles_path) def analyse(self): try: self.do_analysis() finally: # TODO: Not really the right place for it? the_config.database.close() return self.results def make_alignment(self, source_alignment_path): # Make the alignment self.alignment = Alignment() self.alignment.read(source_alignment_path) # TODO REMOVE -- this should be part of the checking procedure # We start by copying the alignment self.alignment_path = os.path.join(the_config.start_tree_path, 'source.phy') if os.path.exists(self.alignment_path): # Make sure it is the same old_align = Alignment() old_align.read(self.alignment_path) if not old_align.same_as(self.alignment): log.error("""Alignment file has changed since previous run. You need to use the force-restart option.""") raise AnalysisError compare = lambda x, y: collections.Counter( x) == collections.Counter(y) if not compare(old_align.species, self.alignment.species): log.error( """Species names in alignment have changed since previous run. You need to use the force-restart option.""") raise AnalysisError else: self.alignment.write(self.alignment_path) def need_new_tree(self, tree_path): if os.path.exists(tree_path): if ';' in open(tree_path).read(): log.info("Starting tree file found.") redo_tree = False else: log.info("""Starting tree file found but it is incomplete. Re-estimating""") redo_tree = True else: log.info("Starting tree will be estimated from the data.") redo_tree = True return redo_tree def make_tree(self, user_path): # Begin by making a filtered alignment, containing ONLY those columns # that are defined in the subsets subset_with_everything = subset_ops.merge_subsets( the_config.user_subsets) self.filtered_alignment = SubsetAlignment(self.alignment, subset_with_everything) self.filtered_alignment_path = os.path.join(the_config.start_tree_path, 'filtered_source.phy') self.filtered_alignment.write(self.filtered_alignment_path) # Check the full subset against the alignment subset_ops.check_against_alignment(subset_with_everything, self.alignment, the_config) # We start by copying the alignment self.alignment_path = os.path.join(the_config.start_tree_path, 'source.phy') # Now check for the tree tree_path = the_config.processor.make_tree_path( self.filtered_alignment_path) if self.need_new_tree(tree_path): log.debug("Estimating new starting tree, no old tree found") # If we have a user tree, then use that, otherwise, create a topology util.clean_out_folder(the_config.start_tree_path, keep=["filtered_source.phy", "source.phy"]) if user_path is not None and user_path != "": # Copy it into the start tree folder log.info("Using user supplied topology at %s" % user_path) topology_path = os.path.join(the_config.start_tree_path, 'user_topology.phy') util.dupfile(user_path, topology_path) need_bl = True elif the_config.no_ml_tree == True: log.debug("didn't find tree at %s, making a new one" % tree_path) topology_path = the_config.processor.make_topology( self.filtered_alignment_path, the_config.datatype, the_config.cmdline_extras) need_bl = True elif the_config.no_ml_tree == False: log.debug( "didn't find tree at %s, making an ML tree with RAxML" % tree_path) tree_scheme = scheme.create_scheme( the_config, "tree_scheme", range(len(the_config.user_subsets))) topology_path = raxml.make_ml_topology( self.filtered_alignment_path, the_config.datatype, the_config.cmdline_extras, tree_scheme, self.threads) # here we copy the ML tree topology so it can be used with PhyML too # TODO: this is a hack, and it would be better to decide on a universal # name for the different types of tree we might have. phyml_tree = os.path.join( os.path.dirname(topology_path), "filtered_source.phy_phyml_tree.txt") copyfile(topology_path, phyml_tree) need_bl = False if need_bl == True: # Now estimate branch lengths tree_path = the_config.processor.make_branch_lengths( self.filtered_alignment_path, topology_path, the_config.datatype, the_config.cmdline_extras) self.tree_path = tree_path log.debug("Starting tree with branch lengths is here: %s" % self.tree_path) def run_task(self, model_name, sub): # This bit should run in parallel (forking the processor) try: the_config.processor.analyse(model_name, sub.alignment_path, self.tree_path, the_config.branchlengths, the_config.cmdline_extras) fabricate = False except ExternalProgramError: if not the_config.suppress_errors: # In the Kmeans algorithm we suppress errors and "fabricate" # subsets (we assume the error is because the subset is too # small for analysis) raise # If it is kmeans we assume that the error is because the subset # is too small or unanalysable, so we fabricate it log.debug("New subset could not be analysed. It will be merged " "at the end of the analysis") fabricate = True # Not entirely sure that WE NEED to block here, but it is safer to do # It shouldn't hold things up toooo long... self.lock.acquire() try: if fabricate: sub.fabricate_model_result(the_config, model_name) else: sub.parse_model_result(the_config, model_name) # Try finalising, then the result will get written out earlier... sub.finalise(the_config) finally: self.lock.release() def add_tasks_for_sub(self, tasks, sub): for m in sub.models_to_process: tasks.append((self.run_task, (m, sub))) def run_concurrent(self, tasks): for func, args in tasks: log.debug("About to analyse subset %s", args[1].name) func(*args) def run_threaded(self, tasks): if not tasks: return pool = threadpool.Pool(tasks, self.threads) pool.join() def analyse_list_of_subsets( self, all_subsets, ): # get a whole list of subsets analysed in parallel # analyse bigger subsets first, for efficiency all_subsets.sort(key=lambda x: 1.0 / float(len(x.columns))) # chunk the list into blocks of ~1000 tasks # in empirical testing, this speeds things up lot # though we are not entirely sure why... n = 1000 n = int(n / len(the_config.models)) if (n < 1): n = 1 # seems unlikely... log.debug("chunk size (in number of subsets) = %d", n) subset_chunks = [ all_subsets[i:i + n] for i in xrange(0, len(all_subsets), n) ] for subsets in subset_chunks: # prepare the list of tasks tasks = [] for sub in subsets: if sub.is_done: pass elif sub.is_prepared: self.add_tasks_for_sub(tasks, sub) else: sub.prepare(the_config, self.alignment) self.add_tasks_for_sub(tasks, sub) if tasks: # Now do the analysis if self.threads == 1: self.run_concurrent(tasks) else: self.run_threaded(tasks) # Now see if we're done for sub in all_subsets: # ALL subsets should already be finalised in the task. We just # check again here if not sub.finalise(the_config): log.error("Failed to run models %s; not sure why" % ", " "".join(list(sub.models_not_done))) raise AnalysisError def analyse_scheme(self, sch): # Progress the_config.progress.next_scheme() # analyse the subsets in the scheme that aren't done # NB for most schemes we will have all subsets done, so this saves time not_done = [] for sub in sch: if sub.is_done == False: not_done.append(sub) if not_done: self.analyse_list_of_subsets(not_done) # AIC needs the number of sequences number_of_seq = len(self.alignment.species) result = scheme.SchemeResult(sch, number_of_seq, the_config.branchlengths, the_config.model_selection) self.results.add_scheme_result(sch, result) return result
import sys from alignment import Alignment file = sys.argv[1] #file = 'test_files/test.txt' args = open(file).readlines() flag = args[0].rstrip() scores = args[1].split() match = int(scores[0]) mismatch = int(scores[1]) indel = int(scores[2]) seq1= args[2].rstrip() seq2= args[3].rstrip() if flag == 'g': a = Alignment(match,mismatch,indel,seq1,seq2) a.single_global_single_align() a.report_optimal_score() elif flag == 'l': a = Alignment(match,mismatch,indel,seq1,seq2) a.local_single_align() a.report_optimal_score() else: print "Invalid alignment flag." file = open('results.txt', "w") file.write("Score:") file.write(a.get_optimal_score()) file.write("\n") file.write("Number of Optimal Alignments:") file.write(a.get_total_optimal_alignments())
class Analysis(object): """Performs the analysis and collects the results""" def __init__(self, cfg, force_restart=False, threads=-1): cfg.validate() self.cfg = cfg self.threads = threads self.results = results.AnalysisResults(self.cfg.model_selection) log.info("Beginning Analysis") self.process_restart(force_restart) # Check for old analyses to see if we can use the old data self.cfg.check_for_old_config() # Make some folders for the analysis self.cfg.make_output_folders() self.make_alignment(cfg.alignment_path) self.make_tree(cfg.user_tree_topology_path) # We need this to block the threads for critical stuff self.lock = threading.Condition(threading.Lock()) def process_restart(self, force_restart): if force_restart: # Remove everything if os.path.exists(self.cfg.output_path): log.warning("Deleting all previous workings in '%s'", self.cfg.output_path) shutil.rmtree(self.cfg.output_path) else: # Just remove the schemes folder if os.path.exists(self.cfg.schemes_path): log.info("Removing Schemes in '%s' (they will be recalculated from existing subset data)", self.cfg.schemes_path) shutil.rmtree(self.cfg.schemes_path) def analyse(self): self.do_analysis() return self.results def make_alignment(self, source_alignment_path): # Make the alignment self.alignment = Alignment() self.alignment.read(source_alignment_path) # We start by copying the alignment self.alignment_path = os.path.join(self.cfg.start_tree_path, 'source.phy') if os.path.exists(self.alignment_path): # Make sure it is the same old_align = Alignment() old_align.read(self.alignment_path) if not old_align.same_as(self.alignment): log.error("Alignment file has changed since previous run. You need to use the force-restart option.") raise AnalysisError else: self.alignment.write(self.alignment_path) def need_new_tree(self, tree_path): if os.path.exists(tree_path): if ';' in open(tree_path).read(): log.info("Starting tree file found.") redo_tree = False else: log.info("Starting tree file found but incomplete. Re-estimating") redo_tree = True else: log.info("No starting tree file found.") redo_tree = True return redo_tree def make_tree(self, user_path): # Begin by making a filtered alignment, containing ONLY those columns # that are defined in the subsets subset_with_everything = subset.Subset(*list(self.cfg.partitions)) self.filtered_alignment = SubsetAlignment(self.alignment, subset_with_everything) self.filtered_alignment_path = os.path.join(self.cfg.start_tree_path, 'filtered_source.phy') self.filtered_alignment.write(self.filtered_alignment_path) # Now we've written this alignment, we need to lock everything in # place, no more adding partitions, or changing them from now on. self.cfg.partitions.check_against_alignment(self.alignment) self.cfg.partitions.finalise() # We start by copying the alignment self.alignment_path = os.path.join(self.cfg.start_tree_path, 'source.phy') # Now check for the tree tree_path = self.cfg.processor.make_tree_path(self.filtered_alignment_path) if self.need_new_tree(tree_path) == True: log.debug("Estimating new starting tree, no old tree found") # If we have a user tree, then use that, otherwise, create a topology util.clean_out_folder(self.cfg.start_tree_path, keep = ["filtered_source.phy", "source.phy"]) if user_path is not None and user_path != "": # Copy it into the start tree folder log.info("Using user supplied topology at %s", user_path) topology_path = os.path.join(self.cfg.start_tree_path, 'user_topology.phy') self.cfg.processor.dupfile(user_path, topology_path) else: log.debug( "didn't find tree at %s, making a new one" % tree_path) topology_path = self.cfg.processor.make_topology( self.filtered_alignment_path, self.cfg.datatype, self.cfg.cmdline_extras) # Now estimate branch lengths tree_path = self.cfg.processor.make_branch_lengths( self.filtered_alignment_path, topology_path, self.cfg.datatype, self.cfg.cmdline_extras) self.tree_path = tree_path log.info("Starting tree with branch lengths is here: %s", self.tree_path) def run_task(self, m, sub): # This bit should run in parallel (forking the processor) self.cfg.processor.analyse( m, sub.alignment_path, self.tree_path, self.cfg.branchlengths, self.cfg.cmdline_extras ) # Not entirely sure that WE NEED to block here, but it is safer to do # It shouldn't hold things up toooo long... self.lock.acquire() try: sub.parse_model_result(self.cfg, m) # Try finalising, then the result will get written out earlier... sub.finalise(self.cfg) finally: self.lock.release() def add_tasks_for_sub(self, tasks, sub): for m in sub.models_to_process: tasks.append((self.run_task, (m, sub))) def run_concurrent(self, tasks): for func, args in tasks: func(*args) def run_threaded(self, tasks): if not tasks: return pool = threadpool.Pool(tasks, self.threads) pool.join() def analyse_scheme(self, sch): # Progress self.cfg.progress.next_scheme() # Prepare by reading everything in first tasks = [] for sub in sch: sub.prepare(self.cfg, self.alignment) self.add_tasks_for_sub(tasks, sub) # Now do the analysis if self.threads == 1: self.run_concurrent(tasks) else: self.run_threaded(tasks) # Now see if we're done for sub in sch: # ALL subsets should already be finalised in the task. We just # check again here if not sub.finalise(self.cfg): log.error("Failed to run models %s; not sure why", ", ".join(list(sub.models_to_do))) raise AnalysisError # AIC needs the number of sequences number_of_seq = len(self.alignment.species) result = scheme.SchemeResult(sch, number_of_seq, self.cfg.branchlengths, self.cfg.model_selection) self.results.add_scheme_result(sch, result) return result
def __init__(self, age=None, gender=None, genus='human'): self.player_controlled = False self._event_type = 'person' self.firstname = u"Антон" self.surname = u"Сычов" self.nickname = u"Сычуля" self.alignment = Alignment() self.features = [] # gets Feature() objects and their child's. Add new Feature only with self.add_feature() self.tokens = [] # Special resources to activate various events self.relations_tendency = {'convention': 0, 'conquest': 0, 'contribution': 0} #obedience, dependecy and respect stats self._stance = [] self.avatar_path = '' self.master = None # If this person is a slave, the master will be set self.supervisor = None self.slaves = [] self.subordinates = [] self.ap = 1 self.schedule = Schedule(self) self.modifiers = Modifiers() # init starting features self.availabe_actions = [] # used if we are playing slave-part self.allowance = 0 # Sparks spend each turn on a lifestyle self.ration = { "amount": 'unlimited', # 'unlimited', 'limited' by price, 'regime' for figure, 'starvation' no food "food_type": "cousine", # 'forage', 'sperm', 'dry', 'canned', 'cousine' "target": 0, # figures range -2:2 "limit": 0, # maximum resources spend to feed character each turn "overfeed": 0, } self.accommodation = 'makeshift' self.skills = [] self.specialized_skill = None self.focused_skill = None self.skills_used = [] self.factors = [] self.restrictions = [] self._needs = init_needs(self) self.attributes = { 'physique': 3, 'mind': 3, 'spirit': 3, 'agility': 3, 'sensitivity':3 } self.university = {'name': 'study', 'effort': 'bad', 'auto': False} self.mood = 0 self.fatigue = 0 self._vitality = 0 self.appetite = 0 self.calorie_storage = 0 self.money = 0 self._determination = 0 self._anxiety = 0 self.rewards = [] self.used_rewards = [] self.merit = 0 # player only var for storing work result # Other persons known and relations with them, value[1] = [needed points, current points] self._relations = [] self.selfesteem = 0 self.conditions = [] self.genus = init_genus(self, genus) self.add_feature(age) self.add_feature(gender) self.set_avatar() persons_list.append(self)
from alignment import align_sequences from alignment import Alignment from alignment.utils import merge sequence_a = 'voldemort' sequence_b = 'waldemort' # align the two sequences align_a, align_b, distance = align_sequences(sequence_a, sequence_b) # construct a new Alignment object alignment = Alignment.from_sequences(align_a, align_b) # pretty print the alignment print alignment
def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed): amr = inAMR # clean up role names: :mod-nn and :MOD => :mod repltriples = [(x, r, (y,)) for x,r,(y,) in amr.triples(instances=False) if r in ['mod-NN','MOD']] newtriples = [(x, 'mod', (y,)) for x,r,(y,) in repltriples] amr = new_amr_from_old(amr, new_triples=newtriples, avoid_triples=repltriples) # for each triple of the form <x :-COREF y>, delete the triple and replace # all occurrences of y with x triples = amr.triples(instances=False) # Use -COREF flags to establish a mapping from current to new variables coref_triples = [trip for trip in triples if trip[1]=='-COREF'] replacements = {} for coref_trip in coref_triples: x, _, (y,) = coref_trip # TODO: strengthen the choice of main concepts for the cluster ''' assert amr.get_concept(x).replace('-ROOT','')==amr.get_concept(y).replace('-ROOT','') \ or (alignment[int(y):] is not None and wTags[alignment[int(y):]]["PartOfSpeech"] in ['PRP','PRP$']) \ or amr.get_concept(y).endswith('-FALLBACK'), (y,ww[alignment[int(y):]],x,ww[alignment[int(x):]]) ''' if x in replacements and replacements[x]==y: # avoid 2-node cycle continue replacements[y] = x # Avoid a chain of replacements, e.g. a -> b and b -> c # Assume there are no cycles, otherwise this will loop infinitely while set(replacements.keys()) & set(replacements.values()): for k in replacements.keys(): if replacements[k] in replacements: assert replacements[k]!=k,('Self-coreferent?',k,'in',sentenceId,replacements) replacements[k] = replacements[replacements[k]] break # MERGE the coreferent nodes all_triples = [] trip2tokAlignment = Alignment('many2one') # source side indexes 'all_triples' newtriples = [] oldtriples = coref_triples for a, r, (b,) in triples: if r=='-COREF': continue trip = (a,r,(b,)) change = False if a in replacements: a = replacements[a] change = True if b in replacements: b = replacements[b] change = True if change: newtriples.append((a,r,b)) oldtriples.append(trip) if isinstance(b,basestring) and b in amr.node_to_concepts and alignment[int(b):] is not None: trip2tokAlignment.link(len(all_triples), alignment[int(b):]) all_triples.append((a,r,b)) amr = new_amr_from_old(amr, new_triples=newtriples, avoid_triples=oldtriples, avoid_concepts=replacements) # delete various decorations for k,v in amr.node_to_concepts.items(): amr.node_to_concepts[k] = v.replace('-FALLBACK_PRON','').replace('-FALLBACK','').replace('-DATE_RELATIVE','').replace('-DATE','').replace('-TIME','') if config.verbose: print('Triple-to-token alignment:',{trip:ww[trip2tokAlignment[t:]]+'-'+str(trip2tokAlignment[t:]) for t,trip in enumerate(all_triples) if trip2tokAlignment[t:] is not None}, file=sys.stderr) # delete CARDINAL concepts (cf. the nes module) unless the concept has no parent # e.g. in wsj_0077.14, "154.2 million shares" is converted from (s / shares :quant (c / CARDINAL :quant 154200000)) to (s / shares :quant 154200000) cardinals = {v for v,c in amr.node_to_concepts.items() if c=='CARDINAL'} for v in cardinals: old2newvars = {} triples = [(x,r,y) for x,r,(y,) in amr.triples(instances=False) if x==v or y==v] try: assert 1<=len(triples)<=2,(triples,amr) except AssertionError: # something complicated; just punt continue if len(triples)<2: continue t1, t2 = triples if t1[2]!=v: t1, t2 = t2, t1 assert t1[2]==t2[0]==v old2newvars[v] = t2[2] del amr.node_to_concepts[v] newtrip = (t1[0],t1[1],t2[2]) assert newtrip[0]!=newtrip[2] # replace t1 and t2 with newtrip amr = new_amr_from_old(amr, new_triples=[newtrip], avoid_triples=[t1,t2]) if config.verbose: print('merge CARDINAL:',[t1,t2],'->',newtrip, file=sys.stderr) t = all_triples.index(t1) #assert trip2tokAlignment[t:] is not None all_triples[t] = newtrip #assert trip2tokAlignment[all_triples.index(t2):] is None #amr = new_amr([(old2newvars.get(x,x), r, (old2newvars.get(y,y),)) for x,r,(y,) in amr.triples(instances=False) if x!=v], amr.node_to_concepts) # choose user-friendly variable names # assumes current variable names are all integer strings old2newvars = {} newconcepts = {} for v,c in amr.node_to_concepts.items(): v2 = c[0].lower() if c[0].isalpha() else v if v2 in newconcepts: # append numerical suffix if necessary to disambiguate assert v2.isalpha() v2 += str(sum(1 for k in newconcepts.keys() if k[0]==v2)) newconcepts[v2] = c old2newvars[v] = v2 all_triples2 = [] trip2tokAlignment2 = Alignment('many2one') for x,r,(y,) in amr.triples(instances=False): t = all_triples.index((x,r,y)) if trip2tokAlignment[t:] is not None: trip2tokAlignment2.link(len(all_triples2), trip2tokAlignment[t:]) all_triples2.append((old2newvars.get(x,x), r, (old2newvars.get(y,y),))) finalAlignment = {trip:ww[trip2tokAlignment2[t:]]+'-'+str(trip2tokAlignment2[t:]) for t,trip in enumerate(all_triples2) if trip2tokAlignment2[t:] is not None} if config.verbose: print('Final triple-to-token alignment:',finalAlignment, file=sys.stderr) amr = new_amr(all_triples2, newconcepts) # detect orphans (variables with no triples) orphans = {v: True for v in newconcepts} for x,r,(y,) in amr.triples(instances=False): if r=='-DUMMY': continue orphans[x] = False if y in orphans: orphans[y] = False orphans = [v for v in orphans if orphans[v]] if config.verbose: print(len(orphans),'orphans',orphans, file=sys.stderr) # ensure a node has a :-DUMMY annotation iff it is an orphan amr = new_amr([(x,r,(y,)) for x,r,(y,) in amr.triples(instances=False) if r!='-DUMMY']+[(o,'-DUMMY','') for o in orphans], newconcepts) def swap_callback((x,r,(y,)),(x2,r2,(y2,))): #TODO: fix alignments pass
import cv2 import sys import numpy as np import datetime from alignment import Alignment sys.path.append('../SSH') from ssh_detector import SSHDetector #short_max = 800 scales = [1200, 1600] t = 2 detector = SSHDetector('../SSH/model/e2ef', 0) alignment = Alignment('./model/3d_I5', 12) out_filename = './out.png' f = '../sample-images/t1.jpg' if len(sys.argv)>1: f = sys.argv[1] img = cv2.imread(f) im_shape = img.shape print(im_shape) target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) if im_size_min>target_size or im_size_max>max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max)
def compute_observation_probabilities(self): print("[++++++++] Compute probabilities of observation constraints") messages_aligned = Alignment.get_messages_aligned( self.messages, os.path.join(self.output_dir, Alignment.FILENAME_OUTPUT_ONELINE)) messages_request, messages_response = Processing.divide_msgs_by_directionlist( self.messages, self.direction_list) messages_request_aligned, messages_response_aligned = Processing.divide_msgs_by_directionlist( messages_aligned, self.direction_list) fid_list_request = self.filter_fields(self.fields, self.fid_list, messages_request_aligned) fid_list_response = self.filter_fields(self.fields, self.fid_list, messages_response_aligned) logging.debug( "request candidate fid: {}\nresponse candidate fid: {}".format( fid_list_request, fid_list_response)) # compute matrix of similarity scores constraint_m_request, constraint_m_response = MessageSimilarity( messages=messages_request_aligned), MessageSimilarity( messages=messages_response_aligned) constraint_m_request.compute_similarity_matrix() constraint_m_response.compute_similarity_matrix() # the observation prob of each cluster: {fid: the list of observation probabilities ([pm,ps,pd,pv])} cluster_p_request, cluster_p_response = dict(), dict() # the size of each cluster cluster_size_request, cluster_size_response = dict(), dict() # the observation prob of each cluster pair: {fid-fid: [,]} pairs_p_request, pairs_p_response = dict(), dict() pairs_size_request, pairs_size_response = dict(), dict() for fid_request in fid_list_request: logging.info("[++++] Test Request Field {0}-*".format(fid_request)) # merge other fields fields_merged_request = self.merge_nontest_fields( self.fields, fid_request) fid_merged_request = 0 if fid_request == 0 else 1 # generate clusters symbols_request_aligned = self.cluster_by_field( fields_merged_request, messages_request_aligned, fid_merged_request) # change symbol names symbols_request_aligned = self.change_symbol_name( symbols_request_aligned) # compute prob of m,s,d,v cluster_p_request[fid_request] = list() cluster_p_request[fid_request].append( constraint_m_request.compute_constraint_message_similarity( symbols_request_aligned)) cluster_p_request[fid_request].append( self.compute_constraint_structure(symbols_request_aligned)) cluster_p_request[fid_request].append( self.compute_constraint_dimension(symbols_request_aligned)) cluster_p_request[fid_request].append( self.compute_constraint_value(symbols_request_aligned)) cluster_size_request[fid_request] = [ len(s.messages) for s in symbols_request_aligned.values() ] for fid_response in fid_list_response: #if fid_request != fid_response: # continue logging.debug("[++] Test Response Field {0}-{1}".format( fid_request, fid_response)) # merge other fields fields_merged_response = self.merge_nontest_fields( self.fields, fid_response) fid_merged_response = 0 if fid_response == 0 else 1 # generate clusters symbols_response_aligned = self.cluster_by_field( fields_merged_response, messages_response_aligned, fid_merged_response) # change symbol names symbols_response_aligned = self.change_symbol_name( symbols_response_aligned) # compute prob of m,s,d,v if fid_response not in cluster_p_response: cluster_p_response[fid_response] = list() cluster_p_response[fid_response].append( constraint_m_response. compute_constraint_message_similarity( symbols_response_aligned)) cluster_p_response[fid_response].append( self.compute_constraint_structure( symbols_response_aligned)) cluster_p_response[fid_response].append( self.compute_constraint_dimension( symbols_response_aligned)) cluster_p_response[fid_response].append( self.compute_constraint_value( symbols_response_aligned)) cluster_size_response[fid_response] = [ len(s.messages) for s in symbols_response_aligned.values() ] # print msg numbers of each cluster logging.debug("Number of request symbols: {0}".format( len(symbols_request_aligned.values()))) for s in symbols_request_aligned.values(): logging.debug(" Symbol {0} msgs numbers: {1}".format( str(s.name), len(s.messages))) logging.debug("Number of response symbols: {0}".format( len(symbols_response_aligned.values()))) for s in symbols_response_aligned.values(): logging.debug(" Symbol {0} msgs numbers: {1}".format( str(s.name), len(s.messages))) # compute remote coupling probabilities rc = RemoteCoupling(messages_all=messages_aligned, symbols_request=symbols_request_aligned, symbols_response=symbols_response_aligned, direction_list=self.direction_list) rc.compute_pairs_by_directionlist() fid_pair = "{}-{}".format(fid_request, fid_response) p_r_request = rc.compute_constraint_remote_coupling( RemoteCoupling.TEST_TYPE_REQUEST) p_r_response = rc.compute_constraint_remote_coupling( RemoteCoupling.TEST_TYPE_RESPONSE) logging.debug( "[+] Observation Prob Results for pairs {}".format( fid_pair)) p_m, p_s, p_d, p_v = cluster_p_request[fid_request][ 0], cluster_p_request[fid_request][1], cluster_p_request[ fid_request][2], cluster_p_request[fid_request][3] logging.debug( "Request:\nPm: {0}\nPr: {1}\nPs: {2}\nPd: {3}\nPv: {4}". format(p_m, p_r_request, p_s, p_d, p_v)) pairs_p_request[fid_pair] = [p_m, p_r_request, p_s, p_d, p_v] pairs_size_request[fid_pair] = cluster_size_request[ fid_request] p_m, p_s, p_d, p_v = cluster_p_response[fid_response][ 0], cluster_p_response[fid_response][ 1], cluster_p_response[fid_response][ 2], cluster_p_response[fid_response][3] logging.debug( "Response:\nPm: {0}\nPr: {1}\nPs: {2}\nPd: {3}\nPv: {4}". format(p_m, p_r_response, p_s, p_d, p_v)) pairs_p_response[fid_pair] = [p_m, p_r_response, p_s, p_d, p_v] pairs_size_response[fid_pair] = cluster_size_response[ fid_response] del rc del symbols_response_aligned #symbols del fields_merged_response gc.collect() del symbols_request_aligned del fields_merged_request gc.collect() pairs_p = [pairs_p_request, pairs_p_response] pairs_size = [pairs_size_request, pairs_size_response] return pairs_p, pairs_size
class Analysis(object): """Performs the analysis and collects the results""" def __init__(self, cfg, force_restart, threads): the_config.validate() # TODO: Remove -- put this all into "options" if threads == -1: threads = threadpool.get_cpu_count() self.threads = threads # TODO: Move these to the config validate and prepare log.info("Beginning Analysis") self.process_restart(force_restart) # Make some folders for the analysis the_config.make_output_folders() the_config.database = Database(the_config) # Check for old analyses to see if we can use the old data the_config.check_for_old_config() # TODO: This is going to be in "Prepare" self.make_alignment(cfg.alignment_path) self.make_tree(cfg.user_tree_topology_path) # We need this to block the threads for critical stuff self.lock = threading.Condition(threading.Lock()) # Store the result in here self.results = results.AnalysisResults(the_config.model_selection) def process_restart(self, force_restart): if force_restart: # Remove everything if os.path.exists(the_config.output_path): log.warning("Deleting all previous workings in '%s'" % the_config.output_path) shutil.rmtree(the_config.output_path) else: # Remove the schemes folder, and clean out the phylofiles folder if os.path.exists(the_config.schemes_path): log.debug("Removing files in '%s'" % the_config.schemes_path) shutil.rmtree(the_config.schemes_path) if os.path.exists(the_config.phylofiles_path): log.debug("Removing files in '%s'" % the_config.phylofiles_path) shutil.rmtree(the_config.phylofiles_path) def analyse(self): try: self.do_analysis() finally: # TODO: Not really the right place for it? the_config.database.close() return self.results def make_alignment(self, source_alignment_path): # Make the alignment self.alignment = Alignment() self.alignment.read(source_alignment_path) # TODO REMOVE -- this should be part of the checking procedure # We start by copying the alignment self.alignment_path = os.path.join(the_config.start_tree_path, 'source.phy') if os.path.exists(self.alignment_path): # Make sure it is the same old_align = Alignment() old_align.read(self.alignment_path) if not old_align.same_as(self.alignment): log.error("""Alignment file has changed since previous run. You need to use the force-restart option.""") raise AnalysisError compare = lambda x, y: collections.Counter(x) == collections.Counter(y) if not compare(old_align.species, self.alignment.species): log.error("""Species names in alignment have changed since previous run. You need to use the force-restart option.""") raise AnalysisError else: self.alignment.write(self.alignment_path) def need_new_tree(self, tree_path): if os.path.exists(tree_path): if ';' in open(tree_path).read(): log.info("Starting tree file found.") redo_tree = False else: log.info("""Starting tree file found but it is incomplete. Re-estimating""") redo_tree = True else: log.info("Starting tree will be estimated from the data.") redo_tree = True return redo_tree def make_tree(self, user_path): # Begin by making a filtered alignment, containing ONLY those columns # that are defined in the subsets subset_with_everything = subset_ops.merge_subsets(the_config.user_subsets) self.filtered_alignment = SubsetAlignment( self.alignment, subset_with_everything) self.filtered_alignment_path = os.path.join( the_config.start_tree_path, 'filtered_source.phy') self.filtered_alignment.write(self.filtered_alignment_path) # Check the full subset against the alignment subset_ops.check_against_alignment(subset_with_everything, self.alignment, the_config) # We start by copying the alignment self.alignment_path = os.path.join( the_config.start_tree_path, 'source.phy') # Now check for the tree tree_path = the_config.processor.make_tree_path( self.filtered_alignment_path) if self.need_new_tree(tree_path): log.debug("Estimating new starting tree, no old tree found") # If we have a user tree, then use that, otherwise, create a topology util.clean_out_folder(the_config.start_tree_path, keep=["filtered_source.phy", "source.phy"]) if user_path is not None and user_path != "": # Copy it into the start tree folder log.info("Using user supplied topology at %s" % user_path) topology_path = os.path.join(the_config.start_tree_path, 'user_topology.phy') util.dupfile(user_path, topology_path) need_bl = True elif the_config.no_ml_tree == True: log.debug( "didn't find tree at %s, making a new one" % tree_path) topology_path = the_config.processor.make_topology( self.filtered_alignment_path, the_config.datatype, the_config.cmdline_extras) need_bl = True elif the_config.no_ml_tree == False: log.debug( "didn't find tree at %s, making an ML tree with RAxML" % tree_path) tree_scheme = scheme.create_scheme( the_config, "tree_scheme", range(len(the_config.user_subsets))) topology_path = raxml.make_ml_topology( self.filtered_alignment_path, the_config.datatype, the_config.cmdline_extras, tree_scheme, self.threads) # here we copy the ML tree topology so it can be used with PhyML too # TODO: this is a hack, and it would be better to decide on a universal # name for the different types of tree we might have. phyml_tree = os.path.join(os.path.dirname(topology_path), "filtered_source.phy_phyml_tree.txt") copyfile(topology_path, phyml_tree) need_bl = False if need_bl == True: # Now estimate branch lengths tree_path = the_config.processor.make_branch_lengths( self.filtered_alignment_path, topology_path, the_config.datatype, the_config.cmdline_extras) self.tree_path = tree_path log.debug("Starting tree with branch lengths is here: %s" % self.tree_path) def run_task(self, model_name, sub): # This bit should run in parallel (forking the processor) try: the_config.processor.analyse( model_name, sub.alignment_path, self.tree_path, the_config.branchlengths, the_config.cmdline_extras ) fabricate = False except ExternalProgramError: if not the_config.suppress_errors: # In the Kmeans algorithm we suppress errors and "fabricate" # subsets (we assume the error is because the subset is too # small for analysis) raise # If it is kmeans we assume that the error is because the subset # is too small or unanalysable, so we fabricate it log.debug("New subset could not be analysed. It will be merged " "at the end of the analysis") fabricate = True # Not entirely sure that WE NEED to block here, but it is safer to do # It shouldn't hold things up toooo long... self.lock.acquire() try: if fabricate: sub.fabricate_model_result(the_config, model_name) else: sub.parse_model_result(the_config, model_name) # Try finalising, then the result will get written out earlier... sub.finalise(the_config) finally: self.lock.release() def add_tasks_for_sub(self, tasks, sub): for m in sub.models_to_process: tasks.append((self.run_task, (m, sub))) def run_concurrent(self, tasks): for func, args in tasks: log.debug("About to analyse subset %s", args[1].name) func(*args) def run_threaded(self, tasks): if not tasks: return pool = threadpool.Pool(tasks, self.threads) pool.join() def analyse_list_of_subsets(self, all_subsets, ): # get a whole list of subsets analysed in parallel # analyse bigger subsets first, for efficiency all_subsets.sort(key = lambda x: 1.0/float(len(x.columns))) # chunk the list into blocks of ~1000 tasks # in empirical testing, this speeds things up lot # though we are not entirely sure why... n = 1000 n = int(n / len(the_config.models)) if(n<1): n=1 # seems unlikely... log.debug("chunk size (in number of subsets) = %d", n) subset_chunks = [all_subsets[i:i + n] for i in xrange(0, len(all_subsets), n)] for subsets in subset_chunks: # prepare the list of tasks tasks = [] for sub in subsets: if sub.is_done: pass elif sub.is_prepared: self.add_tasks_for_sub(tasks, sub) else: sub.prepare(the_config, self.alignment) self.add_tasks_for_sub(tasks, sub) if tasks: # Now do the analysis if self.threads == 1: self.run_concurrent(tasks) else: self.run_threaded(tasks) # Now see if we're done for sub in all_subsets: # ALL subsets should already be finalised in the task. We just # check again here if not sub.finalise(the_config): log.error("Failed to run models %s; not sure why" % ", " "".join(list(sub.models_not_done))) raise AnalysisError def analyse_scheme(self, sch): # Progress the_config.progress.next_scheme() # analyse the subsets in the scheme that aren't done # NB for most schemes we will have all subsets done, so this saves time not_done = [] for sub in sch: if sub.is_done == False: not_done.append(sub) if not_done: self.analyse_list_of_subsets(not_done) # AIC needs the number of sequences number_of_seq = len(self.alignment.species) result = scheme.SchemeResult(sch, number_of_seq, the_config.branchlengths, the_config.model_selection) self.results.add_scheme_result(sch, result) return result