Ejemplo n.º 1
0
def user_defined_exons(tmp_sg, line):
    chr, strand = utils.get_chr(
        line[utils.TARGET]), line[utils.TARGET][0]  # get chr and strand
    upstream_exon = utils.get_pos(
        line[utils.UPSTREAM_EXON])  # get user-defined flanking exons
    downstream_exon = utils.get_pos(line[utils.DOWNSTREAM_EXON])
    first_primer, second_primer = utils.get_primer_coordinates(
        line[utils.PRIMER_COORD])

    # get possible exons for primer amplification
    tmp = sorted(tmp_sg.get_graph().nodes(), key=lambda x: (x[0], x[1]))
    first_ex = utils.find_first_exon(first_primer, tmp)
    last_ex = utils.find_last_exon(second_primer, tmp)
    my_exons = tmp[first_ex:last_ex + 1]
    # if tmp_sg.strand == '+':
    #     my_exons = tmp[tmp.index(upstream_exon):tmp.index(downstream_exon) + 1]
    # else:
    #     my_exons = tmp[tmp.index(downstream_exon):tmp.index(upstream_exon) + 1]

    # Use correct tx's and estimate counts/psi
    all_paths = algs.AllPaths(
        tmp_sg,
        my_exons,
        utils.get_pos(line[utils.TARGET]),  # tuple (start, end)
        chr=chr,
        strand=strand)
    # all_paths.trim_tx_paths()
    fexon = upstream_exon if strand == "+" else downstream_exon
    lexon = downstream_exon if strand == "+" else upstream_exon
    all_paths.trim_tx_paths_using_primers(first_primer, second_primer, fexon,
                                          lexon)
    all_paths.set_all_path_coordinates()
    paths, counts = all_paths.estimate_counts()  # run EM algorithm
    return paths, counts
Ejemplo n.º 2
0
def main():

    t0 = get_times()
    #Capture the data
    if args.iterations is None:
        cap = cpd(nsamples=100000, divisor=args.divisor, volt_range=args.volt_range, nblocks=args.nblocks, dual_mode=True)
        print(cap.shape)
    else:
        cap=[]
        for i in range(args.iterations):
            cap.append(cpd(divisor=args.divisor, volt_range=args.volt_range, nblocks=args.nblocks, dual_mode=True))
            print('completed {} captures'.format(i+1))

    tf = get_times()

    cap = np.array(cap).reshape(args.iterations, 2, -1, 100000)
    cap = cap.transpose([1,0,2,3]).copy()
    cap.shape = (2, -1, 100000)
    #Organize real and complex components
    #cap = organize_caps(cap, args)

    #test with a histogram
    #test_hist(cap)


    # Compute coordintes before and after data capture, store in dict
    coords_0 = get_pos(args, t0)
    coords_f = get_pos(args, tf)
    np.savez(args.path + '\captures' + args.file_name , real=cap[0], image=cap[1], t0=t0, tf=tf, coords_0=coords_0, coords_f=coords_f)
Ejemplo n.º 3
0
    def accept(self, shape_in):
        if (shape_in[1] + 2 * self.pad[0] - self.hk + 1) % self.stride[0] != 0:
            return False

        if (shape_in[2] + 2 * self.pad[1] - self.wk + 1) % self.stride[1] != 0:
            return False

        self.shape_in = shape_in
        self.dim_in = np.prod(self.shape_in, dtype=int)
        self.din, self.hin, self.win = self.shape_in
        self.dk = self.din
        self.hpos = utils.get_pos(self.hin, self.hk, self.pad[0],
                                  self.stride[0])
        self.wpos = utils.get_pos(self.win, self.wk, self.pad[1],
                                  self.stride[1])
        self.hout = self.hpos.size
        self.wout = self.wpos.size
        self.shape = [self.dout, self.hout, self.wout]
        self.dim_out = np.prod(self.shape, dtype=int)
        self.indice = utils.flatten_index(self.shape_in, self.shape_k,
                                          self.pad, self.stride)
        self.dim_k = self.dk * self.hk * self.wk

        # params
        self.w = np.random.normal(0, 1.0 / np.sqrt(self.dim_k),
                                  [self.dim_k, self.dout])
        self.b = np.random.normal(0, 1.0 / np.sqrt(self.dim_k), [1, self.dout])

        # cache
        self.fx = None
        self.dw = np.zeros([self.dim_k, self.dout])
        self.db = np.zeros([1, self.dout])
        self.dw_cache = None
        self.db_cache = None
        return True
Ejemplo n.º 4
0
def user_defined_exons(tmp_sg, line):
    chr, strand = utils.get_chr(line[utils.TARGET]), line[utils.TARGET][0]  # get chr and strand
    upstream_exon = utils.get_pos(line[utils.UPSTREAM_EXON])  # get user-defined flanking exons
    downstream_exon = utils.get_pos(line[utils.DOWNSTREAM_EXON])
    first_primer, second_primer = utils.get_primer_coordinates(line[utils.PRIMER_COORD])

    # get possible exons for primer amplification
    tmp = sorted(tmp_sg.get_graph().nodes(), key=lambda x: (x[0], x[1]))
    first_ex = utils.find_first_exon(first_primer, tmp)
    last_ex = utils.find_last_exon(second_primer, tmp)
    my_exons = tmp[first_ex:last_ex + 1]
    # if tmp_sg.strand == '+':
    #     my_exons = tmp[tmp.index(upstream_exon):tmp.index(downstream_exon) + 1]
    # else:
    #     my_exons = tmp[tmp.index(downstream_exon):tmp.index(upstream_exon) + 1]

    # Use correct tx's and estimate counts/psi
    all_paths = algs.AllPaths(tmp_sg,
                              my_exons,
                              utils.get_pos(line[utils.TARGET]),  # tuple (start, end)
                              chr=chr,
                              strand=strand)
    # all_paths.trim_tx_paths()
    fexon = upstream_exon if strand == "+" else downstream_exon
    lexon = downstream_exon if strand == "+" else upstream_exon
    all_paths.trim_tx_paths_using_primers(first_primer, second_primer, fexon, lexon)
    all_paths.set_all_path_coordinates()
    paths, counts = all_paths.estimate_counts()  # run EM algorithm
    return paths, counts
Ejemplo n.º 5
0
    def test_get_pos(self):
        val = utils.get_pos(3, 2, 0, 1)
        self.assertTrue(np.array_equal(val, [0, 1]))

        val = utils.get_pos(7, 3, 0, 1)
        self.assertTrue(np.array_equal(val, [0, 1, 2, 3, 4]))

        val = utils.get_pos(5, 3, 1, 2)
        self.assertTrue(np.array_equal(val, [0, 2, 4]))
Ejemplo n.º 6
0
    def add_figures_to_image(self, img, left_fit, right_fit):
        """
        Draws information about the center offset and the current lane curvature onto the given image.
        """
        # Calculate curvature
        y_eval = 500
        left_curverad = np.absolute(((1 + (2 * left_fit[0] * y_eval + left_fit[1])**2) ** 1.5) \
                        /(2 * right_fit[0]))
        right_curverad = np.absolute(((1 + (2 * right_fit[0] * y_eval + right_fit[1]) ** 2) ** 1.5) \
                         /(2 * right_fit[0]))
        curvature = (left_curverad + right_curverad) / 2
        min_curvature = min(left_curverad, right_curverad)
        vehicle_position = get_pos(719, left_fit, right_fit)
        # Convert from pixels to meters
        vehicle_position = vehicle_position / 12800 * 3.7
        curvature = curvature / 128 * 3.7
        min_curvature = min_curvature / 128 * 3.7

        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(img, 'Radius of Curvature = %d(m)' % curvature, (50, 50),
                    font, 1, (255, 255, 255), 2)
        left_or_right = "left" if vehicle_position < 0 else "right"
        cv2.putText(
            img, 'Vehicle is %.2fm %s of center' %
            (np.abs(vehicle_position), left_or_right), (50, 100), font, 1,
            (255, 255, 255), 2)
        cv2.putText(img, 'Min Radius of Curvature = %d(m)' % min_curvature,
                    (50, 150), font, 1, (255, 255, 255), 2)
Ejemplo n.º 7
0
def get_motifs(model, data_x, data_y, protein_name):
    model.trainable = False
    pos_x = get_pos(data_x, data_y)
    data_x = pos_x.reshape(pos_x.shape[0], pos_x.shape[1], pos_x.shape[2], 1)
    features = get_feature(model=model, data=data_x)
    features = features.reshape(features.shape[0], features.shape[1], features.shape[3])
    if os.path.exists('./' + protein_name + '.fa'):
        os.remove('./' + protein_name + '.fa')
    fp = open('./' + protein_name + '.fa', 'w')

    count = 0
    for i in range(features.shape[0]):
        seq = get_seq(data_x[i])
        for j in range(features.shape[1]):
            count_1 = 0
            for k in range(features.shape[2]):
                if features[i][j][k] > 0:
                    count_1 += 1
            if count_1 < 33 + int(len(data_y) / 4000):
                continue
            else:
                for k in range(features.shape[2]):
                    if features[i][j][k] > 0.4:
                        fp.write('>' + 'seq_' + str(i) + '_' + 'filter' + str(j) + '_' + str(k) + '\n')
                        fp.write(seq[j:j + 7] + '\n')
                        count += 1
    fp.close()
    print('count:', count)
    print('{}start get {} logo{}'.format('*' * 10, protein_name, '*' * 10))
    get_logo(protein_name)
    print('{}draw {} logo done{}'.format('*' * 10, protein_name, '*' * 10))
Ejemplo n.º 8
0
def run_js_pos(dataset_name,
               data_dir,
               save_dir,
               models,
               feature_types,
               k_list,
               second=False):
    train_tokens, dev_tokens, train_dev_tokens, test_tokens, \
    train_labels, dev_labels, train_dev_labels, test_labels = utils.load_data(dataset_name)
    train_pos, dev_pos, train_dev_pos, test_pos = utils.get_pos(
        dataset_name, data_dir)
    token_pos_d = als.get_token_pos_d(test_tokens, test_pos)
    # compare with background
    y_data, min_vals, max_vals, y_min_val, y_max_val = [], [], [], 0, 0
    for model_name in models:
        dicts, d_keys = als.create_model_d(save_dir,
                                           model_name,
                                           test_labels=test_labels)
        tmp_y_data = get_jensen_shannon(test_tokens, dicts, d_keys, k_list, 'background', \
                                        combinations=d_keys, token_pos_d=token_pos_d)
        tmp_min_val, tmp_max_val = als.get_min_max(tmp_y_data)
        min_vals.append(tmp_min_val)
        max_vals.append(tmp_max_val)
        y_data.append(tmp_y_data)
    y_min_val = np.min(min_vals) - 0.05
    y_max_val = np.max(max_vals) + 0.05
    simi.show_simi_plot(k_list, y_data, 'Number of important features (k)', 'Jensen-Shannon Score', '', \
                        (13, 12), '', y_min=y_min_val, y_max=y_max_val, if_model=True, second=second, \
                        if_combi=False, if_background=True, if_builtin_posthoc=True)
Ejemplo n.º 9
0
def test(model, test_x, test_y, draw_motifs=None, protein_name='ALKBH5'):
    print('{}begin testing{}'.format('*' * 10, '*' * 10))
    scores = []
    pos_support = get_pos(train_x, train_y)

    pos_x = pos_support.reshape(pos_support.shape[0], pos_support.shape[1], pos_support.shape[2], 1)

    for i, test_X in enumerate(test_x):
        repeat_test = test_X.reshape(1, test_X.shape[0], test_X.shape[1], 1).repeat(len(pos_x), axis=0)
        preds = model.predict([repeat_test, pos_x])
        scores.append(np.mean(preds))
    AUCs = roc_auc_score(test_y, scores)
    print("Mean AUC: {}, protein name: {}".format(np.mean(AUCs), protein_name))
    if draw_motifs:
        get_motifs(model, test_x, test_y, protein_name)
Ejemplo n.º 10
0
def save_isforms_and_counts(line, options):
    # get information about each row
    ID, target_coordinate = line[:2]
    strand = target_coordinate[0]
    chr = utils.get_chr(target_coordinate[1:])
    tmp_start, tmp_end = utils.get_pos(target_coordinate)
    logging.debug('Saving isoform and count information for event %s . . .' %
                  ID)

    # get information from GTF annotation
    gene_dict, gene_name = retrieve_gene_information(options, strand, chr,
                                                     tmp_start, tmp_end)

    # get edge weights
    edge_weights_list = [
        sam_obj.extractSamRegion(chr, gene_dict['start'], gene_dict['end'])
        for sam_obj in options['rnaseq']
    ]

    # construct splice graph for each BAM file
    bam_splice_graphs = sg.construct_splice_graph(edge_weights_list,
                                                  gene_dict,
                                                  chr,
                                                  strand,
                                                  options['read_threshold'],
                                                  options['min_jct_count'],
                                                  output_type='list',
                                                  both=options['both_flag'])

    for bam_ix, my_splice_graph in enumerate(bam_splice_graphs):
        # this case is meant for user-defined flanking exons
        if line[utils.PSI_UP] == '-1' and line[utils.PSI_DOWN] == '-1':
            # find path and count information
            paths, counts = user_defined_exons(my_splice_graph, line)

            # filter out single exon paths
            # my_tmp = [(path, count) for path, count in zip(paths, counts) if len(path) > 1]
            # paths, counts = zip(*my_tmp)
        # this case is meant for automatic choice of flanking exons
        else:
            paths, counts = primerseq_defined_exons(my_splice_graph, line,
                                                    options['psi'])
        utils.save_path_info('%s.%d' % (ID, bam_ix),
                             paths,
                             counts,
                             save_dir='tmp/indiv_isoforms/')
    logging.debug(
        'Finished saving isoform and count information for event %s.' % ID)
Ejemplo n.º 11
0
def primerseq_defined_exons(tmp_sg, line, psi_option):
    """
    Get information about counts and paths if using PrimerSeq to define the flanking exons.
    """
    # not the best use of the ExonSeek object, initially intended to find appropriate flanking exons
    # but in this case ExonSeek is used to get the transcripts and associate counts
    ID = line[utils.ID]
    tgt_pos = utils.get_pos(line[utils.TARGET])
    exon_seek_obj = ExonSeek(tgt_pos,
                             tmp_sg,
                             ID,
                             psi_option,
                             None,  # no defined upstream exon
                             None)  # no defined downstream exon
    all_paths, upstream, downstream, component, psi_target, psi_upstream, psi_downstream = exon_seek_obj.get_info()
    return exon_seek_obj.paths, exon_seek_obj.counts
Ejemplo n.º 12
0
 def __init__(self):
     pygame.sprite.Sprite.__init__(self)
     self.cor = get_color()
     self.width,self.height = (40,40)
     self.x,self.y,self.dir = get_pos()
     self.imageMaster = pygame.Surface((self.width,self.height))
     self.imageMaster.fill(self.cor)
     self.imageMaster.set_colorkey((255,255,255))
     self.image = self.imageMaster
     pygame.draw.rect(self.image, self.cor, (self.x,self.y,self.width,self.height))
     self.rect = self.image.get_rect()
     self.rect.x = self.x
     self.rect.y = self.y
     self.speed = random.randrange(1,10)
     self.dx = 0 
     self.dy = 0 
     self.colide = False
Ejemplo n.º 13
0
def primerseq_defined_exons(tmp_sg, line, psi_option):
    """
    Get information about counts and paths if using PrimerSeq to define the flanking exons.
    """
    # not the best use of the ExonSeek object, initially intended to find appropriate flanking exons
    # but in this case ExonSeek is used to get the transcripts and associate counts
    ID = line[utils.ID]
    tgt_pos = utils.get_pos(line[utils.TARGET])
    exon_seek_obj = ExonSeek(
        tgt_pos,
        tmp_sg,
        ID,
        psi_option,
        None,  # no defined upstream exon
        None)  # no defined downstream exon
    all_paths, upstream, downstream, component, psi_target, psi_upstream, psi_downstream = exon_seek_obj.get_info(
    )
    return exon_seek_obj.paths, exon_seek_obj.counts
Ejemplo n.º 14
0
def save_isforms_and_counts(line, options):
    # get information about each row
    ID, target_coordinate = line[:2]
    strand = target_coordinate[0]
    chr = utils.get_chr(target_coordinate[1:])
    tmp_start, tmp_end = utils.get_pos(target_coordinate)
    logging.debug('Saving isoform and count information for event %s . . .' % ID)

    # get information from GTF annotation
    gene_dict, gene_name = retrieve_gene_information(options,
                                                     strand, chr, tmp_start, tmp_end)

    # get edge weights
    edge_weights_list = [sam_obj.extractSamRegion(chr, gene_dict['start'], gene_dict['end'])
                         for sam_obj in options['rnaseq']]

    # construct splice graph for each BAM file
    bam_splice_graphs = sg.construct_splice_graph(edge_weights_list,
                                                  gene_dict,
                                                  chr,
                                                  strand,
                                                  options['read_threshold'],
                                                  options['min_jct_count'],
                                                  output_type='list',
                                                  both=options['both_flag'])

    for bam_ix, my_splice_graph in enumerate(bam_splice_graphs):
        # this case is meant for user-defined flanking exons
        if line[utils.PSI_UP] == '-1' and line[utils.PSI_DOWN] == '-1':
            # find path and count information
            paths, counts = user_defined_exons(my_splice_graph, line)

            # filter out single exon paths
            # my_tmp = [(path, count) for path, count in zip(paths, counts) if len(path) > 1]
            # paths, counts = zip(*my_tmp)
        # this case is meant for automatic choice of flanking exons
        else:
            paths, counts = primerseq_defined_exons(my_splice_graph, line, options['psi'])
        utils.save_path_info('%s.%d' % (ID, bam_ix),
                             paths, counts,
                             save_dir='tmp/indiv_isoforms/')
    logging.debug('Finished saving isoform and count information for event %s.' % ID)
Ejemplo n.º 15
0
def run_pos_percent(dataset_name, data_dir, save_dir, models, feature_types,
                    k):
    train_tokens, dev_tokens, train_dev_tokens, test_tokens, \
    train_labels, dev_labels, train_dev_labels, test_labels = utils.load_data(dataset_name)
    train_pos, dev_pos, train_dev_pos, test_pos = utils.get_pos(
        dataset_name, data_dir)

    pos_types = ['NOUN', 'VERB', 'ADJ', 'ADV', 'PRON', 'DET']
    token_pos_d = als.get_token_pos_d(test_tokens, test_pos)
    vocab_size = len(test_tokens) * k
    min_vals, max_vals, y_min_val, y_max_val = [], [], 0, 0
    for idx, feature_type in enumerate(feature_types):
        dicts, d_keys = als.create_explainer_d(save_dir,
                                               feature_type,
                                               len(test_labels),
                                               test_labels=test_labels)
        tmp_y_data = als.get_combi_pos(d_keys, dicts, test_tokens, k,
                                       token_pos_d, vocab_size)
        tmp_min_val, tmp_max_val = als.get_min_max(tmp_y_data)

        y_min_val = max(tmp_min_val - 1, 0)
        y_max_val = min(tmp_max_val + 1, 100)

        y_data = als.format_pos_data(tmp_y_data, pos_types)
        assert len(y_data) == len(pos_types)
        display_model_names = als.get_explainer_combinations(combi=False)
        display_feature_names = als.get_model_combinations(combi=False)
        x_data = []
        x_data.append('Background')
        for model_name in display_model_names:
            label = '{}'.format(model_name)
            x_data.append(label)

        show_bar_plot(x_data, y_data, '', 'Percentage', \
                      '', (15, 14), '', y_min=y_min_val, \
                      y_max=y_max_val, labels=pos_types)
Ejemplo n.º 16
0
 def choose_offense(self, formation: of.OffenseFormation):
     # how are we ordering this.  Kind of want to do the optional stuff first.  Still just assuming it works
     # which is bad.  Soooo lets go with FB, SLOT2, TE2, SLOT1, TE1, RB1, WR1, WR2, OT1, OT2, OG1, OG2, C, QB
     # Then order like OT, OG, C, OG, OT, WR1, WR2, WR3, WR4, RB1, QB
     # Maybe go with the REC1 REC2 things etc.
     # TODO: Kicking
     # Need to assure no duplicates
     # While True is emulating a do loop
     # This is all overly long and weird, need a better way of doing this
     cur_list = []
     k = get_pos(cur_list,
                 self._pos_rota[Position.K]) if formation.no_k > 0 else None
     cur_list.append(k)
     gnr2 = get_pos(
         cur_list,
         self._pos_rota[Position.GNR]) if formation.no_gnr > 1 else None
     cur_list.append(gnr2)
     gnr1 = get_pos(
         cur_list,
         self._pos_rota[Position.GNR]) if formation.no_gnr > 0 else None
     cur_list.append(gnr1)
     fb = get_pos(
         cur_list,
         self._pos_rota[Position.FB]) if formation.no_rbs > 1 else None
     cur_list.append(fb)
     wr4 = get_pos(
         cur_list,
         self._pos_rota[Position.SLOT]) if formation.no_wrs > 3 else None
     cur_list.append(wr4)
     te2 = get_pos(
         cur_list,
         self._pos_rota[Position.TE]) if formation.no_tes > 1 else None
     cur_list.append(te2)
     wr3 = get_pos(
         cur_list,
         self._pos_rota[Position.SLOT]) if formation.no_wrs > 2 else None
     cur_list.append(wr3)
     te = get_pos(
         cur_list,
         self._pos_rota[Position.TE]) if formation.no_tes > 0 else None
     cur_list.append(te)
     rb = get_pos(
         cur_list,
         self._pos_rota[Position.RB]) if formation.no_rbs > 0 else None
     cur_list.append(rb)
     wr2 = get_pos(
         cur_list,
         self._pos_rota[Position.WR]) if formation.no_wrs > 1 else None
     cur_list.append(wr2)
     wr1 = get_pos(
         cur_list,
         self._pos_rota[Position.WR]) if formation.no_wrs > 0 else None
     cur_list.append(wr1)
     ot2 = get_pos(cur_list, self._pos_rota[Position.OT])
     cur_list.append(ot2)
     ot1 = get_pos(cur_list, self._pos_rota[Position.OT])
     cur_list.append(ot1)
     og2 = get_pos(cur_list, self._pos_rota[Position.OG])
     cur_list.append(og2)
     og1 = get_pos(cur_list, self._pos_rota[Position.OG])
     cur_list.append(og1)
     c = get_pos(cur_list, self._pos_rota[Position.C])
     cur_list.append(c)
     qb = get_pos(
         cur_list,
         self._pos_rota[Position.QB]) if formation.no_k < 1 else None
     # Should eventually be a class really instead of relying on numbers.
     players = [
         ot2, og2, c, og1, ot1, wr1, wr2, wr3, wr4, rb, fb, te, te2, gnr1,
         gnr2, qb, k
     ]
     plyrs = [x for x in players if x is not None]
     return {
         GenOff.OT_L: plyrs[0],
         GenOff.OG_L: plyrs[1],
         GenOff.C: plyrs[2],
         GenOff.OG_R: plyrs[3],
         GenOff.OT_R: plyrs[4],
         GenOff.REC1: plyrs[5],
         GenOff.REC2: plyrs[6],
         GenOff.REC3: plyrs[7],
         GenOff.REC4: plyrs[8],
         GenOff.REC5: plyrs[9],
         GenOff.QB: plyrs[10]
     }
Ejemplo n.º 17
0
def main(options, args_output='tmp/debug.json'):
    """
    The gtf main function is the function designed to be called from other
    scripts. It iterates through each target exons and returns the necessary
    information for primer design.
    """
    genome, args_gtf, args_target = options['fasta'], options['gtf'], options[
        'target']

    # the sam object interfaces with the user specified BAM/SAM file!!!
    sam_obj_list = options['rnaseq']

    # iterate through each target exon
    output = []  # output from program
    for line in args_target:  # was line in handle
        name, line = line  # bad style of reassignment
        tgt = line[0]
        strand = tgt[0]
        tmp_start, tmp_end = get_pos(tgt)
        chr = get_chr(tgt[1:])  # [1:] since strand is first character
        USER_DEFINED_FLANKING_EXONS = True if len(line) == 3 else False
        if USER_DEFINED_FLANKING_EXONS:
            up_exon = utils.get_pos(line[1])  # user's upstream exon
            down_exon = utils.get_pos(line[2])  # user's downstream exon
        else:
            up_exon = None  # user did not provide upstream exon
            down_exon = None  # user did not provide downstream exon

        # This try block is to catch assertions made about the graph. If a
        # PrimerSeqError is raised it only impacts a single target for primer
        # design so complete exiting of the program is not warranted.
        try:
            # if the gtf doesn't have a valid gene_id attribute then use
            # the first method otherwise use the second method.
            if options['no_gene_id']:
                gene_dict, gene_name = get_weakly_connected_tx(
                    args_gtf, strand, chr, tmp_start,
                    tmp_end)  # hopefully filter out junk
            else:
                gene_dict, gene_name = get_from_gtf_using_gene_name(
                    args_gtf, strand, chr, tmp_start, tmp_end)

            # extract all edge weights only once
            edge_weights_list = [
                sam_obj.extractSamRegion(chr, gene_dict['start'],
                                         gene_dict['end'])
                for sam_obj in sam_obj_list
            ]

            # The following options['both_flag'] determines how the splice graph is constructed.
            # The splice graph can be either constructed from annotation junctions
            # where options['both_flag']==False or RNA-Seq + annotation junctions when
            # options['both_flag']==True.

            # single pooled count data splice graph
            splice_graph = construct_splice_graph(edge_weights_list,
                                                  gene_dict,
                                                  chr,
                                                  strand,
                                                  options['read_threshold'],
                                                  options['min_jct_count'],
                                                  output_type='single',
                                                  both=options['both_flag'])
            # Second, get a splice graph for each BAM file
            single_bam_splice_graphs = construct_splice_graph(
                edge_weights_list,
                gene_dict,
                chr,
                strand,
                options['read_threshold'],
                options['min_jct_count'],
                output_type='list',
                both=options['both_flag'])

            ### Logic for choosing methodology of primer design ###
            # user-defined flanking exon case
            if up_exon and down_exon:
                if gene_dict['target'] not in gene_dict['exons']:
                    raise utils.PrimerSeqError(
                        'Error: target exon was not found in gtf annotation')
                elif up_exon not in gene_dict['exons']:
                    raise utils.PrimerSeqError(
                        'Error: upstream exon not in gtf annotation')
                elif down_exon not in gene_dict['exons']:
                    raise utils.PrimerSeqError(
                        'Error: downstream exon not in gtf annotation')
                tmp = predefined_exons_case(
                    name,  # ID for exon (need to save as json)
                    gene_dict['target'],  # target exon tuple (start, end)
                    splice_graph,  # SpliceGraph object
                    genome,  # pygr genome variable
                    up_exon,  # upstream flanking exon
                    down_exon)  # downstream flanking exon
            # always included case
            elif options['psi'] > .9999:
                # note this function ignores edge weights
                tmp = get_flanking_biconnected_exons(tgt, gene_dict['target'],
                                                     splice_graph, genome)
            # user specified a sufficient psi value to call constitutive exons
            else:
                tmp = get_sufficient_psi_exons(
                    tgt, gene_dict['target'], splice_graph, genome, name,
                    options['psi'], up_exon,
                    down_exon)  # note, this function utilizes edge wieghts
            ### End methodology specific primer design ###

            # Error msgs are of length one, so only do psi calculations for
            # non-error msgs
            if len(tmp) > 1:
                # edit target psi value
                tmp_all_paths = tmp[
                    -4]  # CAREFUL the index for the AllPaths object may change
                tmp[2] = calculate_target_psi(gene_dict['target'],
                                              single_bam_splice_graphs,
                                              tmp_all_paths.component,
                                              up_exon=None,
                                              down_exon=None)
                # up_exon=up_exon,
                # down_exon=down_exon)  # CAREFUL index for psi_target may change
                tmp.append(gene_name)

            # append result to output list
            output.append(tmp)
        except (utils.PrimerSeqError, ):
            t, v, trace = sys.exc_info()
            output.append([str(v)])  # just append assertion msg

    return output
Ejemplo n.º 18
0
 def choose_defense(self, formation: df.DefenseFormation) -> list:
     cur_list = []
     dime = get_pos(
         cur_list,
         self._pos_rota[Position.DIME]) if formation.no_cb > 3 else None
     cur_list.append(dime)
     nick = get_pos(
         cur_list,
         self._pos_rota[Position.NICKEL]) if formation.no_cb > 2 else None
     cur_list.append(nick)
     mlb2 = get_pos(
         cur_list,
         self._pos_rota[Position.MLB]) if formation.no_lb > 3 else None
     cur_list.append(mlb2)
     olb1 = get_pos(
         cur_list,
         self._pos_rota[Position.OLB]) if formation.no_lb > 1 else None
     cur_list.append(olb1)
     olb2 = get_pos(
         cur_list,
         self._pos_rota[Position.OLB]) if formation.no_lb > 2 else None
     cur_list.append(olb2)
     dt2 = get_pos(
         cur_list,
         self._pos_rota[Position.DT]) if formation.no_dl > 3 else None
     cur_list.append(dt2)
     de2 = get_pos(cur_list, self._pos_rota[Position.DE])
     cur_list.append(de2)
     de1 = get_pos(cur_list, self._pos_rota[Position.DE])
     cur_list.append(de1)
     dt1 = get_pos(cur_list, self._pos_rota[Position.DT])
     cur_list.append(dt1)
     mlb1 = get_pos(cur_list, self._pos_rota[Position.MLB])
     cur_list.append(mlb1)
     cb1 = get_pos(cur_list, self._pos_rota[Position.CB])
     cur_list.append(cb1)
     cb2 = get_pos(cur_list, self._pos_rota[Position.CB])
     cur_list.append(cb2)
     sf1 = get_pos(cur_list, self._pos_rota[Position.SF])
     cur_list.append(sf1)
     sf2 = get_pos(cur_list, self._pos_rota[Position.SF])
     cur_list.append(sf2)
     players = [
         de1, dt1, dt2, de2, mlb1, mlb2, olb1, olb2, nick, dime, cb1, cb2,
         sf1, sf2
     ]
     return [x for x in players if x is not None]
Ejemplo n.º 19
0
def primer3(options, primer3_options):
    """
    The primer.py main function uses the gtf module to find information about constitutive flanking exons for the target exons of interest.
    It then designs primers by calling primer3. Next it parses the primer3 output and outputs the final results to a file. The output file
    is then emailed to the designated address in the command line parameters.
    """

    # tmp directory
    mkdir_tmp()  # make any necessary tmp directories

    # find flanking exons
    logging.debug('Calling splice_graph.main to find flanking exons')
    flanking_info = splice_graph.main(options)
    logging.debug('Finished splice_graph.main')

    # iterate over all target sequences
    STRAND, EXON_TARGET, PSI_TARGET, UPSTREAM_TARGET, PSI_UPSTREAM, DOWNSTREAM_TARGET, PSI_DOWNSTREAM, ALL_PATHS, UPSTREAM_Seq, TARGET_SEQ, DOWNSTREAM_SEQ, GENE_NAME = range(12)
    output_list = []
    for z in range(len(flanking_info)):
        jobs_ID = str(z+1)  # base file name for primer3 output
        # no flanking exon information case
        if len(flanking_info[z]) == 1:
            logging.debug(flanking_info[z][0])
            output_list.append(flanking_info[z])  # write problem msg
        # has flanking exon information case
        else:
            genome_chr = options['fasta'][flanking_info[z][ALL_PATHS].chr]
            tar = options['target'][z][1][0]  # flanking_info[z][1]  # target interval (used for print statements)
            tar_id = options['target'][z][0]
            ####################### Primer3 Parameter Configuration###########
            P3_FILE_FLAG = '1'
            PRIMER_EXPLAIN_FLAG = '1'
            PRIMER_THERMODYNAMIC_PARAMETERS_PATH = os.path.join(config_options['primer3'],
                                                                'src/primer3_config/')
            SEQUENCE_ID = tar  # use the 'chr:start-stop' format for the sequence ID in primer3
            #SEQUENCE_TEMPLATE = flanking_info[z][UPSTREAM_Seq] + flanking_info[z][TARGET_SEQ].lower() + flanking_info[z][DOWNSTREAM_SEQ]
            #SEQUENCE_TARGET = str(len(flanking_info[z][UPSTREAM_Seq]) + 1) + ',' + str(len(flanking_info[z][TARGET_SEQ]))
            # SEQUENCE_PRIMER_PAIR_OK_REGION_LIST = '0,' + str(len(flanking_info[z][UPSTREAM_Seq])) + ',' + str(len(flanking_info[z][UPSTREAM_Seq]) + len(flanking_info[z][TARGET_SEQ])) + ',' + str(len(flanking_info[z][DOWNSTREAM_SEQ]))
            if options['short_isoform']:
                # this option uses the shortest available isoform for designing primers
                shortest_isoform = flanking_info[z][ALL_PATHS].get_shortest_path()
                middle_sequence = utils.get_seq_from_list(genome_chr,
                                                          flanking_info[z][STRAND],
                                                          shortest_isoform[1:-1])
                SEQUENCE_TEMPLATE = '%s%s%s' % (str(flanking_info[z][UPSTREAM_Seq]).upper(),
                                                middle_sequence,
                                                str(flanking_info[z][DOWNSTREAM_SEQ]).upper())
                SEQUENCE_PRIMER_PAIR_OK_REGION_LIST = '0,%d,%d,%d' % (len(flanking_info[z][UPSTREAM_Seq]),
                                                                      len(flanking_info[z][UPSTREAM_Seq]) + len(middle_sequence),
                                                                      len(flanking_info[z][DOWNSTREAM_SEQ]))
                middle_pos = (0, len(middle_sequence))
            else:
                # this uses upstream flanking exon, target exon, and downstream flanking exon to design primers
                SEQUENCE_TEMPLATE = '%s%s%s' % (str(flanking_info[z][UPSTREAM_Seq]).upper(),
                                                str(flanking_info[z][TARGET_SEQ]).lower(),
                                                str(flanking_info[z][DOWNSTREAM_SEQ]).upper())
                SEQUENCE_PRIMER_PAIR_OK_REGION_LIST = '0,%d,%d,%d' % (len(flanking_info[z][UPSTREAM_Seq]),
                                                                      len(flanking_info[z][UPSTREAM_Seq]) + len(flanking_info[z][TARGET_SEQ]),
                                                                      len(flanking_info[z][DOWNSTREAM_SEQ]))
                middle_pos = utils.get_pos(flanking_info[z][EXON_TARGET])
            #############################################################

            ####################### Write jobs_ID.conf##################
            with open(os.path.join(config_options['tmp'], jobs_ID + '.conf'), 'w') as outfile:
                # hard coded options
                outfile.write('SEQUENCE_ID=' + SEQUENCE_ID + '\n')
                outfile.write('SEQUENCE_TEMPLATE=' + SEQUENCE_TEMPLATE + '\n')
                #outfile.write('SEQUENCE_TARGET=' + SEQUENCE_TARGET + '\n')
                outfile.write('SEQUENCE_PRIMER_PAIR_OK_REGION_LIST=' + SEQUENCE_PRIMER_PAIR_OK_REGION_LIST + '\n')
                outfile.write('P3_FILE_FLAG=' + P3_FILE_FLAG + '\n')
                outfile.write('PRIMER_EXPLAIN_FLAG=' + PRIMER_EXPLAIN_FLAG + '\n')
                outfile.write('PRIMER_THERMODYNAMIC_PARAMETERS_PATH=' + PRIMER_THERMODYNAMIC_PARAMETERS_PATH + '\n')  # make sure primer3 finds the config files

                # options from primer3.cfg
                for o in primer3_options:
                    outfile.write(o)
                outfile.write('=' + '\n')  # primer3 likes a '=' at the end of sequence params
                logging.debug('Wrote the input file (%s) for primer3' % (config_options['tmp'] + '/' + jobs_ID + '.conf'))

            ###################### Primer3 #####################################
            if os.path.exists(config_options['tmp'] + jobs_ID + '.Primer3'):
                os.remove(config_options['tmp'] + jobs_ID + '.Primer3')  # delete old files

            call_primer3(tar, jobs_ID)  # command line call to Primer3!
            shutil.copy(os.path.join(config_options['tmp'], jobs_ID + '.Primer3'),
                        config_options['primer3_log'])  # copy primer3 results
            shutil.copy(os.path.join(config_options['tmp'], jobs_ID + '.conf'),
                        config_options['primer3_log'])  # copy config file

            #################### Parse '.Primer3' ################################
            primer3_dict = read_primer3(config_options['tmp'] + '/' + jobs_ID + '.Primer3')

            # checks if no output
            if(primer3_dict.keys().count('PRIMER_LEFT_0_SEQUENCE') == 0):
                str_params = (tar, os.path.abspath(os.path.join(config_options['primer3_log'], str(jobs_ID) + '.Primer3')))
                primer3_problem = 'No Primer3 results for %s. Check %s for more details.' % str_params
                logging.debug(primer3_problem)
                output_list.append([primer3_problem])
                continue
            # there is output case
            else:
                logging.debug('There are primer3 results for %s' % SEQUENCE_ID)
                # get info about product sizes
                target_exon_len = len(flanking_info[z][TARGET_SEQ])
                Primer3_PRIMER_PRODUCT_SIZE = int(primer3_dict['PRIMER_PAIR_0_PRODUCT_SIZE']) - target_exon_len
                primer3_coords = primer_coordinates(primer3_dict, flanking_info[z][STRAND], flanking_info[z][ALL_PATHS].chr,
                                                    # utils.get_pos(flanking_info[z][EXON_TARGET]),
                                                    # (0, len(middle_sequence)),
                                                    middle_pos,  # either the target exon or a dummy pos for using shortest isoform
                                                    utils.get_pos(flanking_info[z][UPSTREAM_TARGET]),
                                                    utils.get_pos(flanking_info[z][DOWNSTREAM_TARGET]),
                                                    use_target=True)
                flanking_info[z][ALL_PATHS].set_all_path_lengths(map(utils.get_pos, primer3_coords.split(';')))
                skipping_size_list = flanking_info[z][ALL_PATHS].skip_lengths
                inclusion_size_list = flanking_info[z][ALL_PATHS].inc_lengths
                skipping_size = ';'.join(map(str, filter(lambda x: x>0, skipping_size_list)))
                inclusion_size = ';'.join(map(str, filter(lambda x: x>0, inclusion_size_list)))
                # left_seq = Sequence(primer3_dict['PRIMER_LEFT_0_SEQUENCE'], 'left')
                # right_seq = Sequence(primer3_dict['PRIMER_RIGHT_0_SEQUENCE'], 'right')
                # forward_seq, reverse_seq = (-right_seq, -left_seq) if str(flanking_info[z][STRAND]) == '-' else (left_seq, right_seq)   # reverse complement sequence
                my_strand = flanking_info[z][STRAND]
                forward_pos, reverse_pos = map(utils.get_pos, primer3_coords.split(';')) if flanking_info[z][STRAND] == '+' else map(utils.get_pos, reversed(primer3_coords.split(';')))
                forward_seq = genome_chr[forward_pos[0]:forward_pos[1]] if my_strand == '+' else -genome_chr[forward_pos[0]:forward_pos[1]]
                reverse_seq = -genome_chr[reverse_pos[0]:reverse_pos[1]] if my_strand == '+' else genome_chr[reverse_pos[0]:reverse_pos[1]]
                asm_region = '%s:%d-%d' % (flanking_info[z][ALL_PATHS].chr,
                                           flanking_info[z][ALL_PATHS].asm_component[0][0],
                                           flanking_info[z][ALL_PATHS].asm_component[-1][1])

                # append results to output_list
                tmp = [tar_id, tar, primer3_coords, flanking_info[z][PSI_TARGET], str(forward_seq).upper(), str(reverse_seq).upper(),
                       str((float(primer3_dict['PRIMER_LEFT_0_TM']) + float(primer3_dict['PRIMER_RIGHT_0_TM'])) / 2), skipping_size, inclusion_size,
                       flanking_info[z][UPSTREAM_TARGET], flanking_info[z][PSI_UPSTREAM], flanking_info[z][DOWNSTREAM_TARGET],
                       flanking_info[z][PSI_DOWNSTREAM], asm_region, flanking_info[z][GENE_NAME]]
                output_list.append(tmp)

    # write output information
    with open(options['user_output'], 'wb') as outputfile_tab, open(options['output'], 'wb') as tmp_output:
        # define csv header
        header = ['ID', 'target coordinate', 'primer coordinates', 'PSI target', 'forward primer', 'reverse primer', 'average TM',
                  'skipping product size', 'inclusion product size', 'upstream exon coordinate', 'PSI upstream',
                  'downstream exon coordinate', 'PSI downstream', 'ASM Region', 'Gene']
        output_list = [header] + output_list  # pre-pend header to output file
        csv.writer(outputfile_tab, dialect='excel', delimiter='\t').writerows(output_list)  # output primer design to a tab delimited file
        csv.writer(tmp_output, dialect='excel', delimiter='\t').writerows(output_list)  # output primer design to a tmp file location
Ejemplo n.º 20
0
do_draw = True
do_perf = False

"""Training images"""
pos_path = r'/home/skynet/Datasets/PeopleDataset/Positive'
neg_path = r'/home/skynet/Datasets/PeopleDataset/Negative'

"""Test videos"""
frm_path = r'/home/skynet/Datasets/Crowd_PETS09/S2/L1/Time_12-34/View_008'
# frm_path = r'/home/skynet/Datasets/Crowd_PETS09/S2/L1/Time_12-34/View_007'
# frm_path = r'/home/skynet/Datasets/Crowd_PETS09/S2/L1/Time_12-34/View_006'
# frm_path = r'/home/skynet/Datasets/Crowd_PETS09/S2/L1/Time_12-34/View_005'
# frm_path = r'/home/skynet/Datasets/Crowd_PETS09/S2/L1/Time_12-34/View_001'

"""Get positive and negative examples"""
pos_list, pos_lab = utils.get_pos(pos_path)
# neg_list, neg_lab = utils.get_neg_fix(neg_path)
neg_list, neg_lab = utils.get_neg_rnd(neg_path)
del pos_path, neg_path

"""Initialize HOG"""
hog_obj = utils.load_hog('hog.xml')
hog_list = utils.get_hog(pos_list, pos_lab, neg_list, neg_lab, hog_obj)
del pos_list, pos_lab, neg_list, neg_lab
# hog_par = {'winStride': (8, 8), 'padding': (0, 0), 'scale': 1.2}
# hog_par = {'hitThreshold': 1.2, 'winStride': (8, 8), 'padding': (0, 0), 'scale': 1.2, 'finalThreshold': 4}
hog_par = {'hitThreshold': 1.4, 'winStride': (8, 8), 'padding': (0, 0), 'scale': 1.2, 'finalThreshold': 2}

"""Initialize and train the SVM"""
x, y = utils.get_data(hog_list)
del hog_list
Ejemplo n.º 21
0
    toponym_to_find = " ".join(sys.argv[1:])

    geocoder_api_server = "http://geocode-maps.yandex.ru/1.x/"

    geocoder_params = {
        "apikey": "40d1649f-0493-4b70-98ba-98533de7710b",
        "geocode": toponym_to_find,
        "format": "json"}

    response = requests.get(geocoder_api_server, params=geocoder_params)

    if not response:
        exit(-1)
        pass

    json_response = response.json()

    pos = get_pos(json_response)
    map_params = {
        "ll": ",".join(pos),
        "spn": ",".join(get_size(json_response)),
        "l": "map",
        "pt": f"{pos[0]},{pos[1]},flag"
    }

    map_api_server = "http://static-maps.yandex.ru/1.x/"

    response = requests.get(map_api_server, params=map_params)

    Image.open(BytesIO(response.content)).show()
Ejemplo n.º 22
0
def main(options, args_output='tmp/debug.json'):
    """
    The gtf main function is the function designed to be called from other
    scripts. It iterates through each target exons and returns the necessary
    information for primer design.
    """
    genome, args_gtf, args_target = options['fasta'], options['gtf'], options['target']

    # the sam object interfaces with the user specified BAM/SAM file!!!
    sam_obj_list = options['rnaseq']

    # iterate through each target exon
    output = []  # output from program
    for line in args_target:  # was line in handle
        name, line = line  # bad style of reassignment
        tgt = line[0]
        strand = tgt[0]
        tmp_start, tmp_end = get_pos(tgt)
        chr = get_chr(tgt[1:])  # [1:] since strand is first character
        USER_DEFINED_FLANKING_EXONS = True if len(line) == 3 else False
        if USER_DEFINED_FLANKING_EXONS:
            up_exon = utils.get_pos(line[1])  # user's upstream exon
            down_exon = utils.get_pos(line[2])  # user's downstream exon
        else:
            up_exon = None  # user did not provide upstream exon
            down_exon = None  # user did not provide downstream exon

        # This try block is to catch assertions made about the graph. If a
        # PrimerSeqError is raised it only impacts a single target for primer
        # design so complete exiting of the program is not warranted.
        try:
            # if the gtf doesn't have a valid gene_id attribute then use
            # the first method otherwise use the second method.
            if options['no_gene_id']:
                gene_dict, gene_name = get_weakly_connected_tx(args_gtf, strand, chr, tmp_start, tmp_end)  # hopefully filter out junk
            else:
                gene_dict, gene_name = get_from_gtf_using_gene_name(args_gtf, strand, chr, tmp_start, tmp_end)

            # extract all edge weights only once
            edge_weights_list = [sam_obj.extractSamRegion(chr, gene_dict['start'], gene_dict['end'])
                                 for sam_obj in sam_obj_list]

            # The following options['both_flag'] determines how the splice graph is constructed.
            # The splice graph can be either constructed from annotation junctions
            # where options['both_flag']==False or RNA-Seq + annotation junctions when
            # options['both_flag']==True.

            # single pooled count data splice graph
            splice_graph = construct_splice_graph(edge_weights_list,
                                                  gene_dict,
                                                  chr,
                                                  strand,
                                                  options['read_threshold'],
                                                  options['min_jct_count'],
                                                  output_type='single',
                                                  both=options['both_flag'])
            # Second, get a splice graph for each BAM file
            single_bam_splice_graphs = construct_splice_graph(edge_weights_list,
                                                              gene_dict,
                                                              chr,
                                                              strand,
                                                              options['read_threshold'],
                                                              options['min_jct_count'],
                                                              output_type='list',
                                                              both=options['both_flag'])

            ### Logic for choosing methodology of primer design ###
            # user-defined flanking exon case
            if up_exon and down_exon:
                if gene_dict['target'] not in gene_dict['exons']:
                    raise utils.PrimerSeqError('Error: target exon was not found in gtf annotation')
                elif up_exon not in gene_dict['exons']:
                    raise utils.PrimerSeqError('Error: upstream exon not in gtf annotation')
                elif down_exon not in gene_dict['exons']:
                    raise utils.PrimerSeqError('Error: downstream exon not in gtf annotation')
                tmp = predefined_exons_case(name,  # ID for exon (need to save as json)
                                            gene_dict['target'],  # target exon tuple (start, end)
                                            splice_graph,  # SpliceGraph object
                                            genome,  # pygr genome variable
                                            up_exon,  # upstream flanking exon
                                            down_exon)  # downstream flanking exon
            # always included case
            elif options['psi'] > .9999:
                # note this function ignores edge weights
                tmp = get_flanking_biconnected_exons(tgt, gene_dict['target'],
                                                     splice_graph,
                                                     genome)
            # user specified a sufficient psi value to call constitutive exons
            else:
                tmp = get_sufficient_psi_exons(tgt, gene_dict['target'],
                                               splice_graph,
                                               genome,
                                               name,
                                               options['psi'],
                                               up_exon,
                                               down_exon)  # note, this function utilizes edge wieghts
            ### End methodology specific primer design ###

            # Error msgs are of length one, so only do psi calculations for
            # non-error msgs
            if len(tmp) > 1:
                # edit target psi value
                tmp_all_paths = tmp[-4]  # CAREFUL the index for the AllPaths object may change
                tmp[2] = calculate_target_psi(gene_dict['target'],
                                              single_bam_splice_graphs,
                                              tmp_all_paths.component,
                                              up_exon=None,
                                              down_exon=None)
                                              # up_exon=up_exon,
                                              # down_exon=down_exon)  # CAREFUL index for psi_target may change
                tmp.append(gene_name)

            # append result to output list
            output.append(tmp)
        except (utils.PrimerSeqError,):
            t, v, trace = sys.exc_info()
            output.append([str(v)])  # just append assertion msg

    return output
Ejemplo n.º 23
0
def read_depth_plot(my_bigwigs, output, options):
    if type(options['position']) == type(list()):
        chr = utils.get_chr(options['position'][0])
        start, stop = zip(*map(lambda x: utils.get_pos(x), options['position']))
    else:
        chr = utils.get_chr(options['position'])
        start, stop = utils.get_pos(options['position'])
    bigwigs = my_bigwigs.split(',')
    num_subplots = len(bigwigs)  # num of bam files equals number of subplots
    fig, axes = plt.subplots(num_subplots, 1, sharex=True, sharey=True, figsize=(6, options['size'] * num_subplots))
    gray = (0.9, 0.9, 0.9)

    # iterate over subplots (bigwig files)
    max_count_holder = 0
    if num_subplots == 1:
        # axes.set_title('Read Depth Plot on %s' % chr)
        iterable = [axes]
    else:
        # axes.flat[0].set_title('Read Depth Plot on %s' % chr)
        iterable = axes.flat
    for i, ax in enumerate(iterable):
        #ax.locator_params(nbins=2)
        ax.yaxis.set_label_text('')

        # set bg
        ax.patch.set_facecolor(gray)
        ax.patch.set_edgecolor(gray)
        ax.grid()

        # plot/label
        max_count, real_start, real_stop = generate_plot(ax, bigwigs[i], chr, start, stop, options)  # does the actual work
        draw_text(ax, '%s -- ' % options['gene'] + os.path.splitext(os.path.basename(bigwigs[i]))[0])

        # format options
        ax.xaxis.grid(color='white', linestyle='--', linewidth=1.5)
        ax.yaxis.grid(color='white', linestyle='--', linewidth=1.5)
        ax.xaxis.set_major_formatter(DropFormatter())
        ax.yaxis.set_major_formatter(DropFormatter())
        ax.set_axisbelow(True)

        # hide some ugly lines
        for line in ax.xaxis.get_ticklines() + ax.yaxis.get_ticklines():
            line.set_color(gray)

        # set y-axis
        if max_count > max_count_holder:
            ax.set_ylim(0, 1.5 * max_count)
            ax.set_yticks([0, int( .375 * max_count ), int( .75 * max_count ), int( 1.125 * max_count ), int(1.5 * max_count)])
            max_count_holder = max_count

        # set x-axis options
        ax.set_xlim(real_start, real_stop)     # set x limits
        ax.set_xticks([real_start, real_stop])   # explicitly set ticks
        ax.xaxis.set_ticklabels(map(addCommas, [real_start, real_stop]))   # make nice looking text for labels
        ax.get_xticklabels()[0].set_horizontalalignment('left')
        ax.get_xticklabels()[1].set_horizontalalignment('right')

        # make text box to display chromosome information
        if i == num_subplots - 1:
            offset_text(ax, '%s:' % chr, 3, (-.15, -.17))

        # adjust spacing between subplots
        fig.subplots_adjust(wspace=0.05, hspace=0.05, bottom=.12)

        # save figure
        plt.savefig(output)
Ejemplo n.º 24
0
def read_depth_plot(my_bigwigs, output, options):
    if type(options['position']) == type(list()):
        chr = utils.get_chr(options['position'][0])
        start, stop = zip(
            *map(lambda x: utils.get_pos(x), options['position']))
    else:
        chr = utils.get_chr(options['position'])
        start, stop = utils.get_pos(options['position'])
    bigwigs = my_bigwigs.split(',')
    num_subplots = len(bigwigs)  # num of bam files equals number of subplots
    fig, axes = plt.subplots(num_subplots,
                             1,
                             sharex=True,
                             sharey=True,
                             figsize=(6, options['size'] * num_subplots))
    gray = (0.9, 0.9, 0.9)

    # iterate over subplots (bigwig files)
    max_count_holder = 0
    if num_subplots == 1:
        # axes.set_title('Read Depth Plot on %s' % chr)
        iterable = [axes]
    else:
        # axes.flat[0].set_title('Read Depth Plot on %s' % chr)
        iterable = axes.flat
    for i, ax in enumerate(iterable):
        #ax.locator_params(nbins=2)
        ax.yaxis.set_label_text('')

        # set bg
        ax.patch.set_facecolor(gray)
        ax.patch.set_edgecolor(gray)
        ax.grid()

        # plot/label
        max_count, real_start, real_stop = generate_plot(
            ax, bigwigs[i], chr, start, stop, options)  # does the actual work
        draw_text(
            ax, '%s -- ' % options['gene'] +
            os.path.splitext(os.path.basename(bigwigs[i]))[0])

        # format options
        ax.xaxis.grid(color='white', linestyle='--', linewidth=1.5)
        ax.yaxis.grid(color='white', linestyle='--', linewidth=1.5)
        ax.xaxis.set_major_formatter(DropFormatter())
        ax.yaxis.set_major_formatter(DropFormatter())
        ax.set_axisbelow(True)

        # hide some ugly lines
        for line in ax.xaxis.get_ticklines() + ax.yaxis.get_ticklines():
            line.set_color(gray)

        # set y-axis
        if max_count > max_count_holder:
            ax.set_ylim(0, 1.5 * max_count)
            ax.set_yticks([
                0,
                int(.375 * max_count),
                int(.75 * max_count),
                int(1.125 * max_count),
                int(1.5 * max_count)
            ])
            max_count_holder = max_count

        # set x-axis options
        ax.set_xlim(real_start, real_stop)  # set x limits
        ax.set_xticks([real_start, real_stop])  # explicitly set ticks
        ax.xaxis.set_ticklabels(
            map(addCommas,
                [real_start, real_stop]))  # make nice looking text for labels
        ax.get_xticklabels()[0].set_horizontalalignment('left')
        ax.get_xticklabels()[1].set_horizontalalignment('right')

        # make text box to display chromosome information
        if i == num_subplots - 1:
            offset_text(ax, '%s:' % chr, 3, (-.15, -.17))

        # adjust spacing between subplots
        fig.subplots_adjust(wspace=0.05, hspace=0.05, bottom=.12)

        # save figure
        plt.savefig(output)
Ejemplo n.º 25
0
import numpy as np
import utils
"""Enable drawing and performances"""
do_draw = True
do_perf = False
"""Training images"""
pos_path = r'/home/skynet/Datasets/PeopleDataset/Positive'
neg_path = r'/home/skynet/Datasets/PeopleDataset/Negative'
"""Test videos"""
frm_path = r'/home/skynet/Datasets/Crowd_PETS09/S2/L1/Time_12-34/View_008'
# frm_path = r'/home/skynet/Datasets/Crowd_PETS09/S2/L1/Time_12-34/View_007'
# frm_path = r'/home/skynet/Datasets/Crowd_PETS09/S2/L1/Time_12-34/View_006'
# frm_path = r'/home/skynet/Datasets/Crowd_PETS09/S2/L1/Time_12-34/View_005'
# frm_path = r'/home/skynet/Datasets/Crowd_PETS09/S2/L1/Time_12-34/View_001'
"""Get positive and negative examples"""
pos_list, pos_lab = utils.get_pos(pos_path)
# neg_list, neg_lab = utils.get_neg_fix(neg_path)
neg_list, neg_lab = utils.get_neg_rnd(neg_path)
del pos_path, neg_path
"""Initialize HOG"""
hog_obj = utils.load_hog('hog.xml')
hog_list = utils.get_hog(pos_list, pos_lab, neg_list, neg_lab, hog_obj)
del pos_list, pos_lab, neg_list, neg_lab
# hog_par = {'winStride': (8, 8), 'padding': (0, 0), 'scale': 1.2}
# hog_par = {'hitThreshold': 1.2, 'winStride': (8, 8), 'padding': (0, 0), 'scale': 1.2, 'finalThreshold': 4}
hog_par = {
    'hitThreshold': 1.4,
    'winStride': (8, 8),
    'padding': (0, 0),
    'scale': 1.2,
    'finalThreshold': 2