Esempio n. 1
0
def generate_inference_data(args):
    """
    Generates inference data for the region provided through arguments.

    Parameters
    ----------
    reads_path : path to the aligned reads file
    ref : reference sequence
    region : region for which data is required

    Returns
    -------
    region_name : region name
    positions : positions corresponding provided region
    examples : examples corresponding provided region
    """

    reads_path, ref, region = args

    region_string = f'{region.name}:{region.start + 1}-{region.end}'
    result = gen.generate_features(reads_path, ref, region_string)

    positions = []
    examples = []
    for P, X in zip(*result):
        positions.append(P)
        examples.append(X)

    print(f'>> finished generating examples for {region.name}:{region.start}-{region.end}')
    return region.name, positions, examples
Esempio n. 2
0
def generate_train(args):
    bam_X, bam_Y, ref, region = args

    alignments = get_aligns(bam_Y, ref_name=region.name, start=region.start, end=region.end)
    filtered = filter_aligns(alignments)

    print(f'Finished generating labels for {region.name}:{region.start}-{region.end}.')

    if not filtered:
        print('No alignments.')
        return None

    positions, examples, labels = [], [], []

    for a in filtered:
        pos_labels = dict()
        n_pos = set()

        t_pos, t_labels = get_pos_and_labels(a, ref, region)
        for p, l in zip(t_pos, t_labels):
            if l == ENCODED_UNKNOWN:
                n_pos.add(p)
            else:
                pos_labels[p] = l

        pos_sorted = sorted(list(pos_labels.keys()))
        region_string = f'{region.name}:{pos_sorted[0][0]+1}-{pos_sorted[-1][0]}'

        result = gen.generate_features(bam_X, str(ref), region_string)

        for P, X in zip(*result):
            Y = []
            to_yield = True

            for p in P:
                assert is_in_region(p[0], filtered)

                if p in n_pos:
                    to_yield = False
                    break

                try:
                    y_label = pos_labels[p]
                except KeyError:
                    if p[1] != 0:
                        y_label = encoding[GAP]
                    else:
                        raise KeyError(f'No label mapping for position {p}.')

                Y.append(y_label)

            if to_yield:
                positions.append(P)
                examples.append(X)
                labels.append(Y)

    print(f'Finished generating examples for {region.name}:{region.start}-{region.end}.')
    return region.name, positions, examples, labels
Esempio n. 3
0
def generate_infer(args):
    bam_X, ref, region = args

    region_string = f'{region.name}:{region.start+1}-{region.end}'
    result = gen.generate_features(bam_X, ref, region_string)

    positions, examples = [], []

    for P, X in zip(*result):
        positions.append(P)
        examples.append(X)

    print(f'Finished generating examples for {region.name}:{region.start}-{region.end}.')
    return region.name, positions, examples, None
Esempio n. 4
0
def generate_train_data(args):
    """
    Generates train data for the region provided through arguments.

    Parameters
    ----------
    reads_path : path to the aligned reads file
    truth_genome_path : path to the truth genome
    ref : reference sequence
    region : region for which data is required

    Returns
    -------
    region_name : region name
    positions : positions corresponding provided region
    examples : examples corresponding provided region
    labels : labels corresponding provided region
    """

    reads_path, truth_genome_path, ref, region = args

    aligns = get_aligns(truth_genome_path, region)
    filtered_aligns = filter_aligns(aligns)

    print(f'>> finished generating labels for {region.name}:{region.start}-{region.end}')

    if not filtered_aligns: 
        print(f'>> no alignments')
        return None

    positions = []
    examples = []
    labels = []

    for align in filtered_aligns:
        position_label_dict = dict()
        positions_with_unknown_base = set()

        pos, lbls = get_postions_and_labels(align, ref, region)
        for position, label in zip(pos, lbls):
            if label == Coder.encode(Coder.UNKNOWN):
                positions_with_unknown_base.add(position)
            else:
                position_label_dict[position] = label

        sorted_positions = sorted(list(position_label_dict.keys()))
        region_string = f'{region.name}:{sorted_positions[0][0] + 1}-{sorted_positions[-1][0]}'
        result = gen.generate_features(reads_path, str(ref), region_string)

        for P, X in zip(*result):
            Y = []
            to_yield = True

            for p in P:
                assert is_in_region(p[0], filtered_aligns)

                if p in positions_with_unknown_base:
                    to_yield = False
                    break

                try:
                    y_label = position_label_dict[p]
                except KeyError:
                    if p[1] != 0:
                        y_label = Coder.encode(Coder.GAP)
                    else:
                        raise KeyError(f'error: No label mapping for position {p}!')

                Y.append(y_label)

            if to_yield:
                positions.append(P)
                examples.append(X)
                labels.append(Y)

    print(f'>> finished generating examples for {region.name}:{region.start}-{region.end}')
    return region.name, positions, examples, labels
Esempio n. 5
0
import gmplot
from gmplot import GoogleMapPlotter as gmp
import gen
from sklearn.externals import joblib
from collections import Counter
from sklearn.preprocessing import minmax_scale

clf = joblib.load('classifier.pkl')

cameras = ['116.avi', '117.avi', '118.avi']
traffic = []

for c in cameras:
    count, density = gen.generate_features(c)
    traffic_stat = clf.predict([count, density])
    if traffic_stat == 2:
        print(c, ' : High traffic')
    elif traffic_stat == 1:
        print(c, ' : Medium traffic')
    else:
        print(c, ' : Low traffic')
    traffic.append(traffic_stat)

count = [traffic.count(0), traffic.count(1), traffic.count(2)]
status = count.index(max(count))

color = ['green', 'yellow', 'red']
gmap = gmp.from_geocode(
    "Veermata Jijabai Technological Institute, Mumbai, Maharashtra")
gmap.scatter([19.018892, 19.024714], [72.855786, 72.856964],
             color[status],