Python AlignmentPropertyMatrix Examples

Programming Language: Python

Namespace/Package Name: emase

Examples at hotexamples.com: 5

Python AlignmentPropertyMatrix - 5 examples found. These are the top rated real world Python examples of emase.AlignmentPropertyMatrix extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

count(1)

finalize(1)

save(1)

set_value(1)

Example #1

Show file

File: util.py Project: churchill-lab/bam2ec

def ec2emase(file_in, file_out, target_file=None):
    ec = ec_file.parse(file_in)
    new_shape = (len(ec._targets_list), len(ec._haplotypes_list), len(ec._ec_list))

    LOG.info('Creating APM...')
    LOG.debug('Shape={}'.format(new_shape))


    apm = APM(shape=new_shape, haplotype_names=ec._haplotypes_list, locus_names=ec._targets_list, read_names=ec._ec_list)

    LOG.debug('ec._haplotypes_list={}'.format(str(ec._haplotypes_list)))
    LOG.debug('ec._targets_list[0:10]={}'.format(str(ec._targets_list[0:10])))
    LOG.debug('ec._ec_list[0:10]={}'.format(str(ec._ec_list[0:10])))

    # counts -> the number of times this equivalence class has appeared
    apm.count = ec._ec_counts_list

    counter = 0
    num_haplotypes = len(ec._haplotypes_list)

    try:

        for alignment in ec._alignments:
            #LOG.verbose(str(alignment))
            ec_index = alignment[0]
            target_index = alignment[1]
            temp_bits = alignment[2]

            if temp_bits == 0:
                continue

            bits = int_to_list(temp_bits, num_haplotypes)
            for i, bit in enumerate(bits):
                if bit:
                    # lid, hid, rid, value
                    apm.set_value(target_index, i, ec_index, 1)
    except Exception, e:
        _show_error()
        raise e

Example #2

Show file

File: util.py Project: churchill-lab/bam2ec

def convert(file_in, file_out, target_file=None, emase=False):
    """

    :param file_in: Input BAM/SAM file.
    :param file_out: Output file name.
    :param target_file: The target file is a list of main targets that will be used as main targets,
                        not to limit the main targets.  Useful for comparison purposes between BAM files.
    :param emase: Emase output or normal.
    :return:
    """
    LOG.info('Input File: {}'.format(file_in))
    LOG.info('Output File: {}'.format(file_out))

    if target_file:
        LOG.info('Target File: {}'.format(target_file))

    if emase:
        LOG.info('Emase format requested')

    main_targets = OrderedDict()

    if target_file:
        main_targets = parse_target_file(target_file)
        if len(main_targets) == 0:
            LOG.error("Unable to parse target file")
            sys.exit(-1)

    # ec = equivalence class
    #      the KEY is a comma separated string of tids
    #      the VALUE is the number of times this equivalence class has appeared
    ec = OrderedDict()

    # ec_idx = lookup to ec
    #          the KEY is a comma separated string of tids
    #          the VALUE is a number specifying the insertion order of the KEY value in ec
    ec_idx = {}

    # all the haplotypes
    haplotypes = set()

    # a lookup of tids to main_targets (Ensembl IDs)
    target_idx_to_main_target = {}

    # unique number of tids encountered and the count
    unique_tids = {}

    # unique reads
    unique_reads = {}

    # times encountering new read id
    read_id_switch_counter = 0

    same_read_target_counter = 0

    try:
        sam_file = pysam.Samfile(file_in, 'rb')
        if len(sam_file.header) == 0:
            raise Exception("BAM File has no header information")
    except:
        sam_file = pysam.Samfile(file_in, 'r')
        if len(sam_file.header) == 0:
            raise Exception("SAM File has no header information")

    line_no = 0
    ec_key = None
    tid = None

    target_ids = []
    try:
        read_id = None

        while True:
            alignment = sam_file.next()
            line_no += 1

            # reference_sequence_name = Column 3 from file, the Reference NAME (EnsemblID_Haplotype)
            # tid = the target id, which is 0 or a positive integer mapping to entries
            #       within the sequence dictionary in the header section of a BAM file
            # main_target = the Ensembl id of the transcript

            if alignment.flag == 4:
                continue

            reference_sequence_name = sam_file.getrname(alignment.tid)
            tid = str(alignment.tid)
            main_target = reference_sequence_name.split('_')[0]

            try:
                unique_tids[tid] += 1
            except KeyError:
                unique_tids[tid] = 1

            #LOG.verbose("{}\t{}\t{}".format(main_target, reference_sequence_name, tid))

            if target_file:
                if main_target not in main_targets:
                    LOG.error("Unexpected target found in BAM file: {}".format(main_target))
                    sys.exit(-1)
            else:
                if main_target not in main_targets:
                    main_targets[main_target] = len(main_targets)

            target_idx_to_main_target[tid] = main_target

            try:
                haplotypes.add(reference_sequence_name.split('_')[1])
            except:
                LOG.info('Unable to parse Haplotype from {}'.format(reference_sequence_name))
                return

            # read_id = Column 1 from file, the Query template NAME
            if read_id is None:
                read_id = alignment.qname

            try:
                unique_reads[read_id] += 1
            except KeyError:
                unique_reads[read_id] = 1

            if read_id != alignment.qname:
                ec_key = ','.join(sorted(target_ids))

                try:
                    ec[ec_key] += 1
                except KeyError:
                    ec[ec_key] = 1
                    ec_idx[ec_key] = len(ec_idx)

                read_id = alignment.qname
                target_ids = [tid]
                read_id_switch_counter += 1
            else:
                if tid not in target_ids:
                    target_ids.append(tid)
                else:
                    same_read_target_counter += 1

            if line_no % 1000000 == 0:
                LOG.info("{0:,} alignments processed, with {1:,} equivalence classes".format(line_no, len(ec)))

    except StopIteration:
        LOG.info("{0:,} alignments processed, with {1:,} equivalence classes".format(line_no, len(ec)))

    if tid not in target_ids:
        target_ids.append(tid)
    else:
        same_read_target_counter += 1

    ec_key = ','.join(sorted(target_ids))

    try:
        ec[ec_key] += 1
    except KeyError:
        ec[ec_key] = 1
        ec_idx[ec_key] = len(ec_idx)

    haplotypes = sorted(list(haplotypes))

    LOG.info("# Unique Reads: {:,}".format(len(unique_reads)))
    LOG.info("# Reads/Target Duplications: {:,}".format(same_read_target_counter))
    LOG.info("# Main Targets: {:,}".format(len(main_targets)))
    LOG.info("# Haplotypes: {:,}".format(len(haplotypes)))
    LOG.info("# Unique Targets: {:,}".format(len(unique_tids)))
    LOG.info("# Equivalence Classes: {:,}".format(len(ec)))

    try:
        os.remove(file_out)
    except OSError:
        pass

    if emase:
        try:
            LOG.info('Creating APM...')
            if LOG.isEnabledFor(VERBOSE_LEVELV_NUM):
                LOG.verbose("HAPLOTYPES")
                for h in haplotypes:
                    LOG.verbose(h)
                LOG.verbose("MAIN TARGETS")
                for m in main_targets:
                    LOG.verbose(m)

            new_shape = (len(main_targets), len(haplotypes), len(ec))

            ec_ids = [x for x in xrange(0, len(ec))]

            LOG.debug('Shape={}'.format(new_shape))

            apm = APM(shape=new_shape, haplotype_names=haplotypes, locus_names=main_targets.keys(), read_names=ec_ids)

            # ec.values -> the number of times this equivalence class has appeared
            apm.count = ec.values()

            # k = comma seperated string of tids
            # v = the count
            for k, v in ec.iteritems():
                arr_target_idx = k.split(",")

                # get the main targets by name
                temp_main_targets = set()
                for idx in arr_target_idx:
                    temp_main_targets.add(target_idx_to_main_target[idx])

                # loop through the targets and haplotypes to get the bits
                for main_target in temp_main_targets:
                    # main_target is not an index, but a value like 'ENMUST..001'

                    for i, hap in enumerate(haplotypes):
                        read_transcript = '{}_{}'.format(main_target, hap) # now 'ENMUST..001_A'
                        # get the numerical tid corresponding to read_transcript
                        read_transcript_idx = str(sam_file.gettid(read_transcript))

                        if read_transcript_idx in arr_target_idx:
                            LOG.debug("{}\t{}\t{}".format(ec_idx[k], main_targets[main_target], i))

                            # main_targets[main_target] = idx of main target
                            # i = the haplotype
                            # ec_idx[k] = index of ec
                            apm.set_value(main_targets[main_target], i, ec_idx[k], 1)

            LOG.info("Finalizing...")
            apm.finalize()
            apm.save(file_out, title='bam2ec')
        except:
            _show_error()
    else:
        try:
            LOG.info("Generating BIN file...")

            f = open(file_out, "wb")

            # version
            f.write(pack('<i', 1))
            LOG.verbose("1\t# VERSION")

            # targets
            LOG.verbose("{:,}\t# NUMBER OF TARGETS".format(len(main_targets)))
            f.write(pack('<i', len(main_targets)))
            for main_target, idx in main_targets.iteritems():
                LOG.verbose("{:,}\t{}\t# {:,}".format(len(main_target), main_target, idx))
                f.write(pack('<i', len(main_target)))
                f.write(pack('<{}s'.format(len(main_target)), main_target))

            # haplotypes
            LOG.verbose("{:,}\t# NUMBER OF HAPLOTYPES".format(len(haplotypes)))
            f.write(pack('<i', len(haplotypes)))
            for idx, hap in enumerate(haplotypes):
                LOG.verbose("{:,}\t{}\t# {:,}".format(len(hap), hap, idx))
                f.write(pack('<i', len(hap)))
                f.write(pack('<{}s'.format(len(hap)), hap))

            # equivalence classes
            LOG.verbose("{:,}\t# NUMBER OF EQUIVALANCE CLASSES".format(len(ec)))
            f.write(pack('<i', len(ec)))
            for idx, k in enumerate(ec.keys()):
                # ec[k] is the count
                LOG.verbose("{:,}\t# {}\t{:,}".format(ec[k], k, idx))
                f.write(pack('<i', ec[k]))

            LOG.info("Determining mappings...")

            # equivalence class mappings
            counter = 0
            for k, v in ec.iteritems():
                arr_target_idx = k.split(",")

                # get the main targets by name
                temp_main_targets = set()
                for idx in arr_target_idx:
                    temp_main_targets.add(target_idx_to_main_target[idx])

                counter += len(temp_main_targets)

            LOG.verbose("{:,}\t# NUMBER OF EQUIVALANCE CLASS MAPPINGS".format(counter))
            f.write(pack('<i', counter))

            for k, v in ec.iteritems():
                arr_target_idx = k.split(",")

                # get the main targets by name
                temp_main_targets = set()
                for idx in arr_target_idx:
                    temp_main_targets.add(target_idx_to_main_target[idx])

                # loop through the haplotypes and targets to get the bits
                for main_target in temp_main_targets:
                    # main_target is not an index, but a value like 'ENMUST..001'

                    bits = []

                    for hap in haplotypes:
                        read_transcript = '{}_{}'.format(main_target, hap) # now 'ENMUST..001_A'
                        read_transcript_idx = str(sam_file.gettid(read_transcript))

                        if read_transcript_idx in arr_target_idx:
                            bits.append(1)
                        else:
                            bits.append(0)

                    LOG.verbose("{}\t{}\t{}\t# {}\t{}".format(ec_idx[k], main_targets[main_target], list_to_int(bits), main_target, bits))
                    f.write(pack('<i', ec_idx[k]))
                    f.write(pack('<i', main_targets[main_target]))
                    f.write(pack('<i', list_to_int(bits)))

            f.close()
        except:
            _show_error()

    LOG.info("Done with converting BAM file!")

Example #3

Show file

File: emasify.py Project: Anirudh8841/kallisto-align

def emasify(binary_file_name, emase_file_name):
    """

    :param binary_file_name:
    :param emase_file_name:
    :return:
    """

    if not binary_file_name:
        raise ValueError("empty file name, cannot load")

    print "Binary File: {0}".format(binary_file_name)

    f = open(binary_file_name, 'rb')

    file_version = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]

    if file_version == 0:
        print "Version: 0, Reads"
    elif file_version == 1:
        print "Version: 1, Equivalence Class"
    else:
        print "Unknown version, exiting"

    # TARGETS

    target_ids = []
    targets = OrderedDict()

    num_targets = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
    print "Target Count: {0}".format(num_targets)

    for i in xrange(0, num_targets):
        str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        target = np.fromfile(f, dtype=np.dtype('a' + str(str_len)), count=1)[0]
        targets[target] = i
        target_ids.append(target)

    # HAPLOTYPES

    haplotype_ids = []
    haplotypes = OrderedDict()

    num_haplotypes = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
    print "Haplotype Count: {0}".format(num_haplotypes)

    for i in xrange(0, num_haplotypes):
        str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        haplotype = np.fromfile(f, dtype=np.dtype('a' + str(str_len)),
                                count=1)[0]
        haplotypes[haplotype] = i
        haplotype_ids.append(haplotype)

    if file_version == 0:

        # READS

        read_ids = []
        reads = OrderedDict()

        num_reads = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Read Count: {0}".format(num_reads)

        for i in xrange(0, num_reads):
            str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
            read_id = np.fromfile(f,
                                  dtype=np.dtype('a' + str(str_len)),
                                  count=1)[0]
            reads[read_id] = i
            read_ids.append(read_id)

        # ALIGNMENTS

        num_alignments = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Alignment Count: {0}".format(num_alignments)

        alignments = np.fromfile(f,
                                 dtype=np.dtype('i'),
                                 count=num_alignments * 3)

        print 'Creating APM...'
        new_shape = (len(target_ids), len(haplotypes), len(read_ids))
        aln_mat_kallisto = APM(shape=new_shape,
                               haplotype_names=haplotype_ids,
                               locus_names=target_ids,
                               read_names=read_ids)

        print 'Parsing alignments...'
        widgets = [Bar('>'), ' ', ETA(), ' ', Percentage()]
        pbar = ProgressBar(widgets=widgets, maxval=num_alignments * 3).start()
        counter = 0

        for i in xrange(0, num_alignments * 3, 3):
            rid = alignments[i]
            lid = alignments[i + 1]
            temp_bits = alignments[i + 2]

            counter += 1
            pbar.update(i)
            if temp_bits == 0:
                continue

            bits = simple_from_one(temp_bits, num_haplotypes)
            for hid, b in enumerate(bits):
                if b:
                    aln_mat_kallisto.set_value(lid, hid, rid, 1)

        pbar.finish()
        print "Finalizing..."
        aln_mat_kallisto.finalize()
        aln_mat_kallisto.save(emase_file_name, title='KALLISTOALIGN')

        print "DONE"
    else:

        # EQUIVALENCE CLASSES

        num_ec = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Equivalance Class Count: {0}".format(num_ec)

        ec_ids = [x for x in xrange(0, num_ec)]
        counts = np.fromfile(f, dtype=np.dtype('i'), count=num_ec)

        # ALIGNMENTS

        num_alignments = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Alignment Count: {0}".format(num_alignments)

        alignments = np.fromfile(f,
                                 dtype=np.dtype('i'),
                                 count=num_alignments * 3)

        print 'Creating APM...'
        new_shape = (len(target_ids), len(haplotypes), len(counts))
        aln_mat_kallisto = APM(shape=new_shape,
                               haplotype_names=haplotype_ids,
                               locus_names=target_ids,
                               read_names=ec_ids)

        aln_mat_kallisto.count = counts

        print 'Parsing alignments...'
        widgets = [Bar('>'), ' ', ETA(), ' ', Percentage()]
        pbar = ProgressBar(widgets=widgets, maxval=num_alignments * 3).start()
        counter = 0

        for i in xrange(0, num_alignments * 3, 3):
            rid = alignments[i]
            lid = alignments[i + 1]
            temp_bits = alignments[i + 2]

            counter += 1
            pbar.update(i)
            if temp_bits == 0:
                continue

            bits = simple_from_one(temp_bits, num_haplotypes)
            for hid, b in enumerate(bits):
                if b:
                    aln_mat_kallisto.set_value(lid, hid, rid, 1)

        pbar.finish()

        print "Finalizing..."
        aln_mat_kallisto.finalize()
        aln_mat_kallisto.save(emase_file_name, title='KALLISTOALIGN')

        print "DONE"

Example #4

Show file

File: util.py Project: churchill-lab/bam2ec

def bin2emase(binary_file_name, emase_file_name):
    try:
        if not binary_file_name:
            raise ValueError("empty file name, cannot load")

        LOG.info("Binary File: {0}".format(binary_file_name))

        f = open(binary_file_name, 'rb')

        file_version = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]

        if file_version == 0:
            LOG.info("Version: 0, Reads, exiting")
            sys.exit(-1)
        elif file_version == 1:
            LOG.info("Version: 1, Equivalence Class")
        else:
            LOG.info("Unknown version, exiting")
            sys.exit(-1)

        # TARGETS

        target_ids = []
        targets = OrderedDict()

        num_targets = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        LOG.info("Target Count: {0:,}".format(num_targets))

        for i in xrange(0, num_targets):
            str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
            target = np.fromfile(f, dtype=np.dtype('a' + str(str_len)), count=1)[0]
            targets[target] = i
            target_ids.append(target)

            LOG.verbose("{}\t{}".format(i, target))

        # HAPLOTYPES

        haplotype_ids = []
        haplotypes = OrderedDict()

        num_haplotypes = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        LOG.info("Haplotype Count: {0:,}".format(num_haplotypes))

        for i in xrange(0, num_haplotypes):
            str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
            haplotype = np.fromfile(f, dtype=np.dtype('a' + str(str_len)), count=1)[0]
            haplotypes[haplotype] = i
            haplotype_ids.append(haplotype)

            LOG.verbose("{}\t{}".format(i, haplotype))

        # EQUIVALENCE CLASSES

        num_ec = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        LOG.info("Equivalance Class Count: {0:,}".format(num_ec))

        ec_ids = [x for x in xrange(0, num_ec)]
        counts = np.fromfile(f, dtype=np.dtype('i'), count=num_ec)

        # ALIGNMENTS

        num_alignments = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        LOG.info("Alignment Count: {0:,}".format(num_alignments))


        new_shape = (num_targets, num_haplotypes, num_ec)
        LOG.info('Creating APM...')

        ec_ids = [x for x in xrange(0, num_ec)]

        LOG.debug('Shape={}'.format(new_shape))

        apm = APM(shape=new_shape, haplotype_names=haplotype_ids, locus_names=target_ids, read_names=ec_ids)

        # counts -> the number of times this equivalence class has appeared
        apm.count = counts

        counter = 0
        alignments = np.fromfile(f, dtype=np.dtype('i'), count=num_alignments*3)

        for i in xrange(0, num_alignments*3, 3):
            rid = alignments[i]
            lid = alignments[i+1]
            temp_bits = alignments[i+2]

            counter += 1
            if temp_bits == 0:
                continue

            try:
                bits = int_to_list(temp_bits, num_haplotypes)
                for i, bit in enumerate(bits):
                    if bit:
                        apm.set_value(rid, i, lid, 1)
            except Exception, e:
                _show_error()
                raise e

        LOG.info("Finalizing...")
        apm.finalize()
        apm.save(emase_file_name, title='bam2ec')

Example #5

Show file

File: emasify.py Project: churchill-lab/kallisto-align

def emasify(binary_file_name, emase_file_name):
    """

    :param binary_file_name:
    :param emase_file_name:
    :return:
    """

    if not binary_file_name:
        raise ValueError("empty file name, cannot load")

    print "Binary File: {0}".format(binary_file_name)

    f = open(binary_file_name, 'rb')

    file_version = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]

    if file_version == 0:
        print "Version: 0, Reads"
    elif file_version == 1:
        print "Version: 1, Equivalence Class"
    else:
        print "Unknown version, exiting"

    # TARGETS

    target_ids = []
    targets = OrderedDict()

    num_targets = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
    print "Target Count: {0}".format(num_targets)

    for i in xrange(0, num_targets):
        str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        target = np.fromfile(f, dtype=np.dtype('a' + str(str_len)), count=1)[0]
        targets[target] = i
        target_ids.append(target)

    # HAPLOTYPES

    haplotype_ids = []
    haplotypes = OrderedDict()

    num_haplotypes = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
    print "Haplotype Count: {0}".format(num_haplotypes)

    for i in xrange(0, num_haplotypes):
        str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        haplotype = np.fromfile(f, dtype=np.dtype('a' + str(str_len)), count=1)[0]
        haplotypes[haplotype] = i
        haplotype_ids.append(haplotype)

    if file_version == 0:

        # READS

        read_ids = []
        reads = OrderedDict()

        num_reads = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Read Count: {0}".format(num_reads)

        for i in xrange(0, num_reads):
            str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
            read_id = np.fromfile(f, dtype=np.dtype('a' + str(str_len)), count=1)[0]
            reads[read_id] = i
            read_ids.append(read_id)

        # ALIGNMENTS

        num_alignments = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Alignment Count: {0}".format(num_alignments)

        alignments = np.fromfile(f, dtype = np.dtype('i'), count=num_alignments*3)

        print 'Creating APM...'
        new_shape = (len(target_ids), len(haplotypes), len(read_ids))
        aln_mat_kallisto = APM(shape=new_shape, haplotype_names=haplotype_ids, locus_names=target_ids, read_names=read_ids)

        print 'Parsing alignments...'
        widgets = [Bar('>'), ' ', ETA(), ' ', Percentage()]
        pbar = ProgressBar(widgets=widgets, maxval=num_alignments*3).start()
        counter = 0

        for i in xrange(0, num_alignments*3, 3):
            rid = alignments[i]
            lid = alignments[i+1]
            temp_bits = alignments[i+2]

            counter += 1
            pbar.update(i)
            if temp_bits == 0:
                continue

            bits = simple_from_one(temp_bits, num_haplotypes)
            for hid, b in enumerate(bits):
                if b:
                    aln_mat_kallisto.set_value(lid, hid, rid, 1)

        pbar.finish()
        print "Finalizing..."
        aln_mat_kallisto.finalize()
        aln_mat_kallisto.save(emase_file_name, title='KALLISTOALIGN')

        print "DONE"
    else:

        # EQUIVALENCE CLASSES

        num_ec = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Equivalance Class Count: {0}".format(num_ec)

        ec_ids = [x for x in xrange(0, num_ec)]
        counts = np.fromfile(f, dtype=np.dtype('i'), count=num_ec)

        # ALIGNMENTS

        num_alignments = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Alignment Count: {0}".format(num_alignments)

        alignments = np.fromfile(f, dtype = np.dtype('i'), count=num_alignments*3)

        print 'Creating APM...'
        new_shape = (len(target_ids), len(haplotypes), len(counts))
        aln_mat_kallisto = APM(shape=new_shape, haplotype_names=haplotype_ids, locus_names=target_ids, read_names=ec_ids)

        aln_mat_kallisto.count = counts

        print 'Parsing alignments...'
        widgets = [Bar('>'), ' ', ETA(), ' ', Percentage()]
        pbar = ProgressBar(widgets=widgets, maxval=num_alignments*3).start()
        counter = 0

        for i in xrange(0, num_alignments*3, 3):
            rid = alignments[i]
            lid = alignments[i+1]
            temp_bits = alignments[i+2]

            counter += 1
            pbar.update(i)
            if temp_bits == 0:
                continue

            bits = simple_from_one(temp_bits, num_haplotypes)
            for hid, b in enumerate(bits):
                if b:
                    aln_mat_kallisto.set_value(lid, hid, rid, 1)

        pbar.finish()

        print "Finalizing..."
        aln_mat_kallisto.finalize()
        aln_mat_kallisto.save(emase_file_name, title='KALLISTOALIGN')

        print "DONE"