Esempio n. 1
0
def realign_file(args, model, output_filename, alignment_filename):
    # begin of HACK
    if args.expand_model:
        old_tracks = args.tracks
        args.tracks.add('trf_cons')
    m = model
    if args.annotation_model:
        m = args.annotation_model
    annotations = compute_annotations(args, alignment_filename, m)
    if args.expand_model:
        consensuses = annotations['trf_cons']
        args.tracks = old_tracks
        if 'trf_cons' not in old_tracks:
            del args.tracks['trf_cons']
    # end of HACK
    with Open(output_filename, 'w') as output_file_object:
        for aln in Fasta.load(
            alignment_filename, 
            args.alignment_regexp, 
            Alignment, 
            sequence_selectors=args.sequence_regexp):
            if len(aln.sequences) < 2:
                sys.stderr.write("ERROR: not enough sequences in file\n")
                return 1
            if len(args.draw) == 0:
                drawer = brainwash(AlignmentCanvas)()
            else:
                drawer = AlignmentCanvas()
                drawer.add_original_alignment(aln)
            aln, unmask_repeats = args.mask_repeats(aln, annotations)
            seq1, seq2 = tuple(map(Fasta.alnToSeq, aln.sequences[:2]))
            perf.msg("Data loaded in {time} seconds.")
            perf.replace()
            if args.expand_model:
                # Potrebujem zistit konsenzy
                A = consensuses[aln.names[0]]
                B = consensuses[aln.names[1]]
                cons = list(A.union(B))
                real_model = model.expandModel({'consensus': cons})
            else: 
                real_model = model
            realigner = args.algorithm()
            realigner.setDrawer(drawer)
            realigner.prepareData(seq1, aln.names[0], seq2, aln.names[1], aln, 
                                  real_model, annotations, args)
                                                              
            aln = realigner.realign(0, len(seq1), 0, len(seq2))
            aln = unmask_repeats(aln)
            perf.msg("Sequence was realigned in {time} seconds.")
            perf.replace()
            if len(args.draw) > 0:
                drawer.add_sequence('X', seq1)
                drawer.add_sequence('Y', seq2)
                drawer.add_alignment_line(101, (255, 0, 255, 255), 2, 
                                          AlignmentPositionGenerator(
                                              Alignment([aln[0], aln[2]])))
                drawer.draw(args.draw, 2000, 2000)
                perf.msg("Image was drawn in {time} seconds.")
            # Save output_file
            Fasta.saveAlignmentPiece(aln, output_file_object)
Esempio n. 2
0
def compute_annotations(args, alignment_filename, model):
    annotations = dict()
    if 'trf' in args.tracks:
        trf = None
        for trf_executable in args.trf:
            if os.path.exists(trf_executable):
                trf = TRFDriver(trf_executable, mathType=args.mathType)
                #break
        if trf:
            repeats = trf.run(alignment_filename)
            annotations['trf'] = repeats
                        
    if 'original_repeats' in args.tracks:
        repeats = json.load(Open(alignment_filename + '.repeats',
                                 'r'))
        for k, v in repeats.iteritems():
            repeats[k] = [Repeat(_v[0], _v[1], _v[2], _v[3], _v[4]) 
                          for _v in v]
        
        annotations['original_repeats'] = repeats

    if 'trf_cons' in args.tracks:
        trf = None
        for trf_executable in args.trf:
            if os.path.exists(trf_executable):
                trf = TRFDriver(trf_executable, mathType=args.mathType)
                #break
        if trf:
            repeats = trf.run(alignment_filename)
        #    repeats = json.load(Open(alignment_filename + '.repeats',
        #                         'r'))
        #    for k, v in repeats.iteritems():
        #        repeats[k] = [Repeat(_v[0], _v[1], _v[2], _v[3], _v[4]) 
        #                      for _v in v]
            annotations['trf_cons'] = {}
            for seq_name in repeats:
                cons = set([repeat.consensus for repeat in repeats[seq_name]])
                annotations['trf_cons'][seq_name] = cons
    
    if 'hmm' in args.tracks:
        paths = None;
        if args.trf != None and len(args.trf) > 0:
            paths = args.trf
        driver = HMMDriver(paths, args.mathType, model)
        if driver:
            repeats = driver.run(alignment_filename)
            annotations['hmm'] = repeats
            
    perf.msg("Hints computed in {time} seconds.")
    perf.replace()
    return annotations
Esempio n. 3
0
    def getTable(self, X, x, dx, Y, y, dy, tables,
                 forwardTable=None, backwardTable=None,
                 positionGenerator=None):
        # Najskor chcem taku, co predpoklada ze obe tabulky su velke
        # Potom chcem taku, ktora si bude doratavat chybajuce
        # Potom pridat switch, ktory mi umozni robit optimalizacie.
        # Ale potom bude treba vediet, ci ist od predu, alebo od zadu
        # Fetch tables if they are not provided
        if positionGenerator != None:
            positionGenerator = list(positionGenerator)
        perf.push()
        forwardTable = jcpoint(
            lambda:
                forwardTable 
                if forwardTable != None else
                self.getForwardTableGenerator(X, x, dx, Y, y, dy,
                    positionGenerator=positionGenerator),
            'forward_table',
            self.io_files, 
            self.mathType,
        )
        perf.msg('Forward table was computed in {time} seconds.')
        perf.replace()
        backwardTable = jcpoint(
            lambda:
                backwardTable
                if backwardTable != None else
                self.getBackwardTable(X, x, dx, Y, y, dy,
                    positionGenerator=positionGenerator),
            'backward_table',
            self.io_files,
            self.mathType,
        )
        perf.msg('Backward table was computed in {time} seconds.')
        perf.replace()
        # Sort tables by first element (just in case)    
        sorted(backwardTable,key=lambda (x,_) : x)
        perf.msg('Tables were sorted in {time} seconds.')
        perf.replace()

        # Convert forward table into list
        #ft = [dict() for _ in range(dx + 1)]

        #for (i, _x) in forwardTable:
        #    ft[i - x] = _x

        # Convert backward table into list
        bt = [dict() for _ in range(dx + 1)]
        for (i, B) in backwardTable:
            bt[i] = B
        perf.msg('Backward table was flattened in {time} seconds.')
        perf.replace()

        States = [table(dx, self) for table in tables]
        index = 0
        for i, row in forwardTable:
            #slice position generator
            while (index < len(positionGenerator) and 
                   positionGenerator[index][0] < i):
                index += 1
            start = index
            while (index < len(positionGenerator) and
                   positionGenerator[index][0] <= i):
                index += 1

            for table in States:
                table.processRow(X, x, dx, Y, y, dy, i, row, bt, 
                                 positionGenerator[start:index])
        ret = [table.getData() for table in States]

#        ret = [table(X, x, dx, Y, y, dy, ft, bt, positionGenerator) 
#                for table in tables]
        perf.msg('Posterior table was computed in {time} seconds.')
        perf.pop()
        return ret
Esempio n. 4
0
    def getTable(self,
                 X,
                 x,
                 dx,
                 Y,
                 y,
                 dy,
                 tables,
                 forwardTable=None,
                 backwardTable=None,
                 positionGenerator=None):
        # Najskor chcem taku, co predpoklada ze obe tabulky su velke
        # Potom chcem taku, ktora si bude doratavat chybajuce
        # Potom pridat switch, ktory mi umozni robit optimalizacie.
        # Ale potom bude treba vediet, ci ist od predu, alebo od zadu
        # Fetch tables if they are not provided
        if positionGenerator != None:
            positionGenerator = list(positionGenerator)
        perf.push()
        forwardTable = jcpoint(
            lambda: forwardTable
            if forwardTable != None else self.getForwardTableGenerator(
                X, x, dx, Y, y, dy, positionGenerator=positionGenerator),
            'forward_table',
            self.io_files,
            self.mathType,
        )
        perf.msg('Forward table was computed in {time} seconds.')
        perf.replace()
        backwardTable = jcpoint(
            lambda: backwardTable
            if backwardTable != None else self.getBackwardTable(
                X, x, dx, Y, y, dy, positionGenerator=positionGenerator),
            'backward_table',
            self.io_files,
            self.mathType,
        )
        perf.msg('Backward table was computed in {time} seconds.')
        perf.replace()
        # Sort tables by first element (just in case)
        sorted(backwardTable, key=lambda (x, _): x)
        perf.msg('Tables were sorted in {time} seconds.')
        perf.replace()

        # Convert forward table into list
        #ft = [dict() for _ in range(dx + 1)]

        #for (i, _x) in forwardTable:
        #    ft[i - x] = _x

        # Convert backward table into list
        bt = [dict() for _ in range(dx + 1)]
        for (i, B) in backwardTable:
            bt[i] = B
        perf.msg('Backward table was flattened in {time} seconds.')
        perf.replace()

        States = [table(dx, self) for table in tables]
        index = 0
        for i, row in forwardTable:
            #slice position generator
            while (index < len(positionGenerator)
                   and positionGenerator[index][0] < i):
                index += 1
            start = index
            while (index < len(positionGenerator)
                   and positionGenerator[index][0] <= i):
                index += 1

            for table in States:
                table.processRow(X, x, dx, Y, y, dy, i, row, bt,
                                 positionGenerator[start:index])
        ret = [table.getData() for table in States]

        #        ret = [table(X, x, dx, Y, y, dy, ft, bt, positionGenerator)
        #                for table in tables]
        perf.msg('Posterior table was computed in {time} seconds.')
        perf.pop()
        return ret