Ejemplo n.º 1
0
    def test_shrink_gaps(self):
        """
        Test the gap shrinkage function.
        """
        test_map = Map()
        end = 0
        for i in xrange(10):
            for name in ('fr_{}'.format(i), 'GAP'):
                fr_length = random.randrange(100)
                new_record = Map.Record(name, fr_length, 0, fr_length,
                                        random.choice(('+', '-')), 'ref_1',
                                        end, end + fr_length)
                end += fr_length
                test_map.add_record(new_record)

        gap_size = 50
        test_map.shrink_gaps(gap_size)

        # check if gap sizes are of the specified value
        for i in test_map.chromosomes():
            for j in test_map.fragments(i):
                if j.fr_name == 'GAP':
                    self.assertEqual(j.fr_length, gap_size)
                    self.assertEqual(j.fr_end - j.fr_start, gap_size)
                    self.assertEqual(j.ref_end - j.ref_start, gap_size)
            # check that fragments are adjacent to each other
            for j, k in zip(
                    list(test_map.fragments(i))[:-1],
                    list(test_map.fragments(i))[1:]):
                self.assertEqual(j.ref_end, k.ref_start)
Ejemplo n.º 2
0
    def test_write(self):
        """
        The the writing method of the fragment simulator.
        """
        self.__fragments = tempfile.mkstemp()[1]
        self.__chromosomes = tempfile.mkstemp()[1]
        self.__map = tempfile.mkstemp()[1]

        self.__simulator.write(self.__map, self.__fragments,
                               self.__chromosomes)

        # check if the correct number of fragment and chromosome
        # sequences was written
        fragment_fasta = pyfaidx.Fasta(self.__fragments)
        self.assertEqual(len(fragment_fasta.keys()),
                         self.__fragment_number + self.__unplaced_number)
        chromosome_fasta = pyfaidx.Fasta(self.__chromosomes)
        self.assertEqual(len(chromosome_fasta.keys()),
                         self.__chromosome_number)

        # check if a correct fragment map was written
        test_map = Map()
        test_map.read(self.__map)

        os.unlink(self.__fragments)
        os.unlink(self.__fragments + '.fai')
        os.unlink(self.__chromosomes)
        os.unlink(self.__chromosomes + '.fai')
        os.unlink(self.__map)
Ejemplo n.º 3
0
 def test_summary(self):
     """
     Test the Map summary routine.
     """
     fragment_map = Map()
     fragment_map.read(self.__test_line)
     self.assertIsInstance(fragment_map.summary(), dict)
Ejemplo n.º 4
0
class Transfer(object):
    """
    Implements transfering routines for abstract data.
    """

    def __init__(self, fragment_map):
        """
        Create a Transfer object.

        :param fragment_map: a fragment map for feature transfer
        :type fragment_map: Map
        """
        self.__fragment_map = Map()
        self.__fragment_map.read(fragment_map)

    def find_fragment(self, fragment):
        """
        Given a fragment name, return its record from the fragment
        map. If the specified fragment is absent in the map, return
        None.

        :param fragment: a fragment name
        :type fragment: str
        :return: a fragment map record corresponding to the specified
            fragment
        :rtype Map.Record
        """
        for chromosome in self.__fragment_map.chromosomes():
            for record in self.__fragment_map.fragments(chromosome):
                if record.fr_name == fragment:
                    return record

        return None

    def coordinate(self, fragment, pos):
        """
        Given a position on a fragment, return the corresponding
        coordinates on the assembled chromosomes according to the
        fragment map specified when the object was created.

        :param fragment: a fragment name
        :param pos: a position on a fragment (zero-based)
        :type fragment: str
        :type pos: int
        :return: a tuple of the chromosome name and a position on it
        :rtype: tuple
        """
        fr_record = self.find_fragment(fragment)
        if fr_record is None:
            # the fragment is absent in the assembly, skip the feature
            return None

        chrom = fr_record.ref_chr
        if fr_record.fr_strand == '+':
            chrom_pos = fr_record.ref_start + pos
        else:
            chrom_pos = fr_record.ref_end - pos

        return chrom, chrom_pos
Ejemplo n.º 5
0
 def test_chromosomes(self):
     """
     Test the Map chromosomes iterator.
     """
     fragment_map = Map()
     fragment_map.read(self.__test_line)
     chromosomes = list(fragment_map.chromosomes())
     self.assertEqual(chromosomes, ['chr1'])
Ejemplo n.º 6
0
    def __init__(self, fragment_map):
        """
        Create a Transfer object.

        :param fragment_map: a fragment map for feature transfer
        :type fragment_map: Map
        """
        self.__fragment_map = Map()
        self.__fragment_map.read(fragment_map)
Ejemplo n.º 7
0
 def test_add_record(self):
     """
     Check if fragment records are added correctly.
     """
     fragment_map = Map()
     new_record = Map.Record(fr_name='fragment1',
                             fr_length=180,
                             fr_start=0,
                             fr_end=180,
                             fr_strand='+',
                             ref_chr='chr1',
                             ref_start=5000,
                             ref_end=5180)
     fragment_map.add_record(new_record)
Ejemplo n.º 8
0
    def test_blast(self):
        """
        Test the blast method which utilizes BLASTN alignments to
        construct a fragment map.
        """
        fragment_lengths = SeqLengths(self.__fragment_file)
        map_creator = AlignmentToMap(self.__gap_size,
                                     fragment_lengths.lengths())
        with open(self.__alignment_file) as alignment_file:
            blast_alignments = BlastTab(alignment_file)
            new_map = map_creator.blast(blast_alignments, 1.2)[0]
            orig_map = Map()
            orig_map.read(self.__map_file)

            # compare the obtained fragment map with the original one
            for chromosome in orig_map.chromosomes():
                for orig, new in izip(orig_map.fragments(chromosome),
                                      new_map.fragments(chromosome)):
                    self.assertEqual(orig, new)

            # now test againt the situation when a fragment which length
            # is missing is added to the alignments
        with open(self.__alignment_file) as alignment_file:
            blast_alignments = BlastTab(alignment_file)
            incomplete_lengths = fragment_lengths.lengths()
            del incomplete_lengths[sorted(incomplete_lengths.keys())[0]]
            map_creator = AlignmentToMap(self.__gap_size,
                                         incomplete_lengths,
                                         min_fragment_length=50)
            with self.assertRaises(AlignmentToMapError):
                map_creator.blast(blast_alignments, 1.2)
Ejemplo n.º 9
0
 def test_convert2bed(self):
     """
     Test the BED conversion routine.
     """
     fragment_map = Map()
     fragment_map.read(self.__test_line)
     fragment_map.convert2bed(self.__output_file)
     # try to read the produced BED file
     with open(self.__output_file) as bed_file:
         reader = Reader(bed_file)
         for _ in reader.records():
             pass
Ejemplo n.º 10
0
    def test_fragments(self):
        """
        Test the Map fragments iterator.
        """
        fragment_map = Map()
        fragment_map.read(self.__test_line)
        fragments = list(fragment_map.fragments('chr1'))
        self.assertEqual(len(fragments), 1)
        self.assertIsInstance(fragments[0], Map.Record)

        # check if the missing chromosome is processed correctly
        with self.assertRaises(MapError):
            list(fragment_map.fragments('chrN'))
Ejemplo n.º 11
0
    def test_read(self):
        """
        Test the Map reading routine.
        """
        fragment_map = Map()
        fragment_map.read(self.__test_line)
        fragment = fragment_map.fragments('chr1').next()
        self.assertEqual(fragment.fr_name, 'fragment1')
        self.assertEqual(fragment.fr_length, 180)
        self.assertEqual(fragment.fr_start, 0)
        self.assertEqual(fragment.fr_end, 180)
        self.assertEqual(fragment.fr_strand, '+')
        self.assertEqual(fragment.ref_chr, 'chr1')
        self.assertEqual(fragment.ref_start, 5000)
        self.assertEqual(fragment.ref_end, 5180)

        # check for incorrect input files
        for i in self.__incorrect_files:
            with self.assertRaises(MapError):
                fragment_map.read(os.path.join(self.__incorrect_file_dir, i))
Ejemplo n.º 12
0
    def test_write(self):
        """
        Test the Map writing routine.
        """
        fragment_map = Map()
        fragment_map.read(self.__test_line)

        output_filename = os.path.join('data', 'fragment_map',
                                       'fragment_map_output.txt')
        fragment_map.write(output_filename)

        with open(output_filename) as output_file:
            with open(self.__test_line) as original_file:
                for x, y in izip(original_file, output_file):
                    self.assertEqual(x, y)

        os.unlink(output_filename)
Ejemplo n.º 13
0
def chromosomer():
    """
    The main function that is run if Chromosomer was launched. It
    defines a command-line parser which processed arguments passed to
    the program.
    """
    parser = argparse.ArgumentParser(
        description='Reference-assisted chromosome assembly tool.')
    subparsers = parser.add_subparsers(dest='command')

    parser.add_argument('-v', '--version', action='version',
                        version='%(prog)s 0.1.4')

    parser.add_argument('-d', '--debug', action='store_true',
                        help='show debugging messages')

    # Parser for the 'chromosomer assemble' part that produces a FASTA
    # file of assembled chromosomes from the specified fragment map.
    assemble_parser = subparsers.add_parser(
        'assemble',
        help='get sequences of assembled chromosomes',
        description='Get the FASTA file of assembled chromosomes.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    # required arguments for the 'assemble' routine
    assemble_parser.add_argument('map',
                                 help='a fragment map file')
    assemble_parser.add_argument('fragment_fasta',
                                 help='a FASTA file of fragment '
                                      'sequences to be assembled')
    assemble_parser.add_argument('output_fasta',
                                 help='the output FASTA file of the '
                                      'assembled chromosome sequences')

    # optinal arguments for the 'assemble' routine
    assemble_parser.add_argument('-s', '--save_soft_mask',
                                 action='store_true',
                                 help='keep soft masking from the '
                                      'original fragment sequences')

    # Parser for the 'chromosomer fragmentmap' part that
    # produces a map of fragment positions on reference
    # chromosomes from BLAST alignments of the fragments to the
    # chromosomes.
    fragmentmap_parser = subparsers.add_parser(
        'fragmentmap',
        description='Construct a fragment map from fragment '
                    'alignments to reference chromosomes.',
        help='construct a fragment map from alignments',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    # required arguments for the 'fragmentmap' routine
    fragmentmap_parser.add_argument(
        'alignment_file',
        help='a BLAST tabular file of fragment alignments to '
             'reference chromosomes'
    )
    fragmentmap_parser.add_argument(
        'gap_size', type=int,
        help='a size of a gap inserted between mapped fragments'
    )
    fragmentmap_parser.add_argument(
        'fragment_lengths',
        help='a file containing lengths of fragment sequences; it can '
             'be obtained using the \'chromosomer fastalength\' tool'
    )
    fragmentmap_parser.add_argument(
        'output_map',
        help='an output fragment map file name'
    )

    # optional arguments for the 'fragmentmap' routine
    fragmentmap_parser.add_argument(
        '-r', '--ratio_threshold', type=float, default=1.2,
        help='the least ratio of two greatest fragment alignment '
             'scores to determine the fragment placed to a reference '
             'genome'
    )

    fragmentmap_parser.add_argument(
        '-s', '--shrink_gaps', action='store_true',
        help='shrink large interfragment gaps to the specified size'
    )

    # Parser for the 'chromosomer fragmentmapstat' part that reports
    # statistics on a fragment map
    fragmentmapstat_parser = subparsers.add_parser(
        'fragmentmapstat',
        description='Show statistics on a fragment map.',
        help='show fragment map statistics'
    )

    # required arguments for the 'fragmentmapstat' routine
    fragmentmapstat_parser.add_argument('map',
                                        help='a fragment map file')
    fragmentmapstat_parser.add_argument('output',
                                        help='an output file of '
                                             'fragment map statistics')

    # Parser for the 'chromosomer fragmentmapbed' part that converts
    # a fragement map to the BED format
    fragmentmapbed_parser = subparsers.add_parser(
        'fragmentmapbed',
        description='Convert a fragment map to the BED format.',
        help='convert a fragment map to the BED format'
    )

    # required arguments for the 'fragmentmapbed' routine
    fragmentmapbed_parser.add_argument('map',
                                       help='a fragment map file')
    fragmentmapbed_parser.add_argument('output',
                                       help='an output BED file '
                                            'representing the '
                                            'fragment map')

    # Parser for the 'chromosomer transfer' part that transfers
    # genome feature annotation from fragments to their assembly
    transfer_parser = subparsers.add_parser(
        'transfer',
        description='Transfer annotated genomic features from '
                    'fragments to their assembly.',
        help='transfer annotated features from fragments to '
             'chromosomes',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    # required arguments for the 'transfer' routine
    transfer_parser.add_argument('map',
                                 help='a fragment map file')
    transfer_parser.add_argument('annotation',
                                 help='a file of annotated genome '
                                      'features')
    transfer_parser.add_argument('output',
                                 help='an output file of the '
                                      'transfered annotation')

    # optional arguments for the 'transfer' routine
    transfer_parser.add_argument('-f', '--format', default='bed',
                                 choices=['bed', 'gff3', 'vcf'],
                                 help='the format of a file of '
                                      'annotated features (bed, '
                                      'gff3 or vcf)')

    # Parser for the 'chromosomer fastalength' part that calculates
    # lengths of sequences in the given FASTA file.
    fastalength_parser = subparsers.add_parser(
        'fastalength',
        description='Get lengths of sequences in the specified FASTA '
                    'file (required to build a fragment map).',
        help='get lengths of sequences from a FASTA file',
    )

    # required arguments for the 'fastalength' routine
    fastalength_parser.add_argument('fasta',
                                    help='a FASTA file which sequence '
                                         'lengths are to be obtained')
    fastalength_parser.add_argument('output',
                                    help='an output file of sequence '
                                         'lengths')

    # Parser for the 'chromosomer simulator' routine
    simulator_parser = subparsers.add_parser(
        'simulator',
        description='Simulate fragments and test assembly for '
                    'testing purposes.',
        help='fragment simulator for testing purposes'
    )

    # required arguments for the 'simulator' routine
    simulator_parser.add_argument('fr_num', type=int,
                                  help='the number of '
                                       'chromosome fragments')
    simulator_parser.add_argument('fr_len', type=int,
                                  help='the length of fragments')
    simulator_parser.add_argument('chr_num', type=int,
                                  help='the number of chromosomes')
    simulator_parser.add_argument('output_dir',
                                  help='the directory for output files')
    simulator_parser.add_argument('-g', '--gap_size', type=int,
                                  default=2000,
                                  help='the size of gaps between '
                                       'fragments on a chromosome')
    simulator_parser.add_argument('-p', '--unplaced', type=int,
                                  help='the number of unplaced '
                                       'fragments')
    simulator_parser.add_argument('--prefix', default='',
                                  help='the prefix for output file '
                                       'names')

    # Parser for the 'chromosomer agp2map' routine
    agp2map_parser = subparsers.add_parser(
        'agp2map',
        description='Convert an AGP file to the fragment map format.',
        help='convert an AGP file to a fragment map'
    )

    # required arguments for the 'agp2map' routine
    agp2map_parser.add_argument('agp_file', help='an AGP file')
    agp2map_parser.add_argument('output_file', help='the output '
                                                    'fragment map '
                                                    'file')

    args = parser.parse_args()

    if args.debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)
        logger.propagate = False
        formatter = logging.Formatter('%(asctime)-15s - %(message)s',
                                      '%Y-%m-%d %H:%M:%S')
        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        ch.setFormatter(formatter)
        logger.addHandler(ch)

        logging.basicConfig()
        cli_logger = logging.getLogger(__name__)
        cli_logger.propagate = False
        cli_logger.addHandler(ch)
        cli_logger.setLevel(logging.INFO)

    if args.command == 'assemble':
        fragment_map = Map()
        fragment_map.read(args.map)
        fragment_map.assemble(args.fragment_fasta,
                              args.output_fasta,
                              args.save_soft_mask)
    elif args.command == 'fragmentmap':
        fragment_lengths = read_fragment_lengths(args.fragment_lengths)
        map_creator = AlignmentToMap(args.gap_size, fragment_lengths)
        with open(args.alignment_file) as alignment_file:
            alignments = BlastTab(alignment_file)
            fragment_map, unlocalized, unplaced = map_creator.blast(
                alignments, args.ratio_threshold)
            if args.shrink_gaps:
                fragment_map.shrink_gaps(args.gap_size)
            fragment_map.write(args.output_map)
            # write unlocalized and unplaced fragments
            with open(splitext(args.output_map)[0] + '_unlocalized.txt',
                      'w') as unlocalized_file:
                for i in unlocalized:
                    unlocalized_file.write('{}\t{}\n'.format(*i))
            with open(splitext(args.output_map)[0] + '_unplaced.txt',
                      'w') as unplaced_file:
                for i in unplaced:
                    unplaced_file.write('{}\n'.format(i))
    elif args.command == 'transfer':
        total_count = transferred_count = 0
        if args.format == 'bed':
            transferrer = BedTransfer(args.map)
            with open(args.annotation) as input_file:
                with bioformats.bed.Writer(args.output) as output_file:
                    for feature in bioformats.bed.Reader(
                            input_file).records():
                        total_count += 1
                        transferred_feature = transferrer.feature(
                            feature)
                        if transferred_feature is not None:
                            transferred_count += 1
                            output_file.write(transferred_feature)
        elif args.format == 'gff3':
            transferrer = Gff3Transfer(args.map)
            with open(args.annotation) as input_file:
                with bioformats.gff3.Writer(args.output) as output_file:
                    for feature in bioformats.gff3.Reader(
                            input_file).records():
                        total_count += 1
                        transferred_feature = transferrer.feature(
                            feature)
                        if transferred_feature is not None:
                            transferred_count += 1
                            output_file.write(transferred_feature)
        elif args.format == 'vcf':
            transferrer = VcfTransfer(args.map)
            reader = vcf.Reader(open(args.annotation))
            writer = vcf.Writer(open(args.output, 'w'), reader)
            for variant in reader:
                total_count += 1
                transferred_feature = transferrer.feature(variant)
                if transferred_feature is not None:
                    transferred_count += 1
                    writer.write_record(transferred_feature)
            writer.close()

        logger.info('%d features transferred', transferred_count)
        logger.info('%d features skipped',
                    total_count - transferred_count)
    elif args.command == 'fastalength':
        seq_lengths = SeqLengths(args.fasta)
        with open(args.output, 'wt') as length_file:
            length_writer = csv.writer(length_file, delimiter='\t')
            for header, length in seq_lengths.lengths().iteritems():
                length_writer.writerow((header, length, ))
    elif args.command == 'simulator':
        fr_simulator = Simulator(args.fr_len, args.fr_num,
                                 args.chr_num, args.unplaced,
                                 args.gap_size)
        map_file = os.path.join(args.output_dir,
                                args.prefix + 'map.txt')
        chr_file = os.path.join(args.output_dir,
                                args.prefix + 'chromosomes.fa')
        fr_file = os.path.join(args.output_dir, args.prefix +
                               'fragments.fa')
        fr_simulator.write(map_file, fr_file, chr_file)
    elif args.command == 'fragmentmapstat':
        fragment_map = Map()
        fragment_map.read(args.map)
        summary = fragment_map.summary()
        template = '\t'.join(['{}'] * 4) + '\n'
        with open(args.output, 'w') as output_file:
            for chromosome in sorted(summary.keys()):
                output_file.write(template.format(chromosome,
                                                  *summary[chromosome]))
    elif args.command == 'fragmentmapbed':
        fragment_map = Map()
        fragment_map.read(args.map)
        fragment_map.convert2bed(args.output)
    elif args.command == 'agp2map':
        agp2map(args.agp_file, args.output_file)
Ejemplo n.º 14
0
    def test_assemble(self):
        """
        Test the assemble routine.
        """
        # first, we form fragment and chromosome sequences
        fragments = {}
        fragment_pattern = ['AC', 'AG', 'CT', 'CG', 'AT']
        for i, pattern in enumerate(fragment_pattern):
            fragments['fragment{}'.format(i + 1)] = pattern * 5
        # a negative number indicated reverse orientation of a fragment
        chromosome_content = {'chr1': [1, -2, 3], 'chr2': [-4, 5]}
        # get chromosome sequences
        chromosomes = {}
        complement = string.maketrans('ATCGatcgNnXx', 'TAGCtagcNnXx')
        gap_size = 10
        for i, chromosome_fragments in chromosome_content.iteritems():
            chromosomes[i] = []
            for j in chromosome_fragments:
                fr_seq = fragments['fragment{}'.format(abs(j))]
                if j < 0:
                    chromosomes[i].append(fr_seq[::-1].translate(complement))
                else:
                    chromosomes[i].append(fr_seq)
                chromosomes[i].append('N' * gap_size)
            chromosomes[i] = ''.join(chromosomes[i])
        # contruct a fragment __map
        fragment_map = Map()
        for i, chromosome_fragments in chromosome_content.iteritems():
            current_start = 0
            for j in chromosome_fragments:
                fr_name = 'fragment{}'.format(abs(j))
                fr_length = 10
                fr_start = 0
                fr_end = fr_length
                fr_strand = '+' if j > 0 else '-'
                ref_chr = i
                ref_start = current_start
                ref_end = current_start + fr_length
                fragment_map.add_record(
                    Map.Record(fr_name, fr_length, fr_start, fr_end, fr_strand,
                               ref_chr, ref_start, ref_end))
                current_start += fr_length
                # add the gap
                fr_name = 'GAP'
                fr_length = gap_size
                fr_start = 0
                fr_end = gap_size
                fr_strand = '+'
                ref_chr = i
                ref_start = current_start
                ref_end = current_start + fr_end
                fragment_map.add_record(
                    Map.Record(fr_name, fr_length, fr_start, fr_end, fr_strand,
                               ref_chr, ref_start, ref_end))
                current_start += fr_length

        output_chromosomes = os.path.join(self.__output_dir,
                                          'temp_chromosomes.txt')
        output_fragments = os.path.join(self.__output_dir,
                                        'temp_fragments.txt')

        # write the fragment sequences to a FASTA file
        with Writer(output_fragments) as writer:
            for i, j in fragments.iteritems():
                writer.write(i, j)

        fragment_map.assemble(output_fragments, output_chromosomes)

        # read fragments from the written FASTA file and compare them
        # to the original ones
        assembled_chromosomes = pyfaidx.Fasta(output_chromosomes)
        for i, seq in chromosomes.iteritems():
            self.assertEqual(seq, assembled_chromosomes[i][:].seq)

        # try to use the fragment absent in the FASTA file of
        # fragment sequences
        fragment_map.add_record(
            Map.Record(fr_name='missing_fragment',
                       fr_length=0,
                       fr_start=0,
                       fr_end=0,
                       fr_strand='+',
                       ref_chr='chr3',
                       ref_start=0,
                       ref_end=0))
        with self.assertRaises(MapError):
            fragment_map.assemble(output_fragments, output_chromosomes)

        os.unlink(output_chromosomes)
        os.unlink(output_chromosomes + '.fai')
        os.unlink(output_fragments)
        os.unlink(output_fragments + '.fai')
Ejemplo n.º 15
0
def chromosomer():
    """
    The main function that is run if Chromosomer was launched. It
    defines a command-line parser which processed arguments passed to
    the program.
    """
    parser = argparse.ArgumentParser(
        description='Reference-assisted chromosome assembly tool.')
    subparsers = parser.add_subparsers(dest='command')

    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version='%(prog)s 0.1.4')

    parser.add_argument('-d',
                        '--debug',
                        action='store_true',
                        help='show debugging messages')

    # Parser for the 'chromosomer assemble' part that produces a FASTA
    # file of assembled chromosomes from the specified fragment map.
    assemble_parser = subparsers.add_parser(
        'assemble',
        help='get sequences of assembled chromosomes',
        description='Get the FASTA file of assembled chromosomes.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # required arguments for the 'assemble' routine
    assemble_parser.add_argument('map', help='a fragment map file')
    assemble_parser.add_argument('fragment_fasta',
                                 help='a FASTA file of fragment '
                                 'sequences to be assembled')
    assemble_parser.add_argument('output_fasta',
                                 help='the output FASTA file of the '
                                 'assembled chromosome sequences')

    # optinal arguments for the 'assemble' routine
    assemble_parser.add_argument('-s',
                                 '--save_soft_mask',
                                 action='store_true',
                                 help='keep soft masking from the '
                                 'original fragment sequences')

    # Parser for the 'chromosomer fragmentmap' part that
    # produces a map of fragment positions on reference
    # chromosomes from BLAST alignments of the fragments to the
    # chromosomes.
    fragmentmap_parser = subparsers.add_parser(
        'fragmentmap',
        description='Construct a fragment map from fragment '
        'alignments to reference chromosomes.',
        help='construct a fragment map from alignments',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # required arguments for the 'fragmentmap' routine
    fragmentmap_parser.add_argument(
        'alignment_file',
        help='a BLAST tabular file of fragment alignments to '
        'reference chromosomes')
    fragmentmap_parser.add_argument(
        'gap_size',
        type=int,
        help='a size of a gap inserted between mapped fragments')
    fragmentmap_parser.add_argument(
        'fragment_lengths',
        help='a file containing lengths of fragment sequences; it can '
        'be obtained using the \'chromosomer fastalength\' tool')
    fragmentmap_parser.add_argument('output_map',
                                    help='an output fragment map file name')

    # optional arguments for the 'fragmentmap' routine
    fragmentmap_parser.add_argument(
        '-r',
        '--ratio_threshold',
        type=float,
        default=1.2,
        help='the least ratio of two greatest fragment alignment '
        'scores to determine the fragment placed to a reference '
        'genome')

    fragmentmap_parser.add_argument(
        '-s',
        '--shrink_gaps',
        action='store_true',
        help='shrink large interfragment gaps to the specified size')

    # Parser for the 'chromosomer fragmentmapstat' part that reports
    # statistics on a fragment map
    fragmentmapstat_parser = subparsers.add_parser(
        'fragmentmapstat',
        description='Show statistics on a fragment map.',
        help='show fragment map statistics')

    # required arguments for the 'fragmentmapstat' routine
    fragmentmapstat_parser.add_argument('map', help='a fragment map file')
    fragmentmapstat_parser.add_argument('output',
                                        help='an output file of '
                                        'fragment map statistics')

    # Parser for the 'chromosomer fragmentmapbed' part that converts
    # a fragement map to the BED format
    fragmentmapbed_parser = subparsers.add_parser(
        'fragmentmapbed',
        description='Convert a fragment map to the BED format.',
        help='convert a fragment map to the BED format')

    # required arguments for the 'fragmentmapbed' routine
    fragmentmapbed_parser.add_argument('map', help='a fragment map file')
    fragmentmapbed_parser.add_argument('output',
                                       help='an output BED file '
                                       'representing the '
                                       'fragment map')

    # Parser for the 'chromosomer transfer' part that transfers
    # genome feature annotation from fragments to their assembly
    transfer_parser = subparsers.add_parser(
        'transfer',
        description='Transfer annotated genomic features from '
        'fragments to their assembly.',
        help='transfer annotated features from fragments to '
        'chromosomes',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # required arguments for the 'transfer' routine
    transfer_parser.add_argument('map', help='a fragment map file')
    transfer_parser.add_argument('annotation',
                                 help='a file of annotated genome '
                                 'features')
    transfer_parser.add_argument('output',
                                 help='an output file of the '
                                 'transfered annotation')

    # optional arguments for the 'transfer' routine
    transfer_parser.add_argument('-f',
                                 '--format',
                                 default='bed',
                                 choices=['bed', 'gff3', 'vcf'],
                                 help='the format of a file of '
                                 'annotated features (bed, '
                                 'gff3 or vcf)')

    # Parser for the 'chromosomer fastalength' part that calculates
    # lengths of sequences in the given FASTA file.
    fastalength_parser = subparsers.add_parser(
        'fastalength',
        description='Get lengths of sequences in the specified FASTA '
        'file (required to build a fragment map).',
        help='get lengths of sequences from a FASTA file',
    )

    # required arguments for the 'fastalength' routine
    fastalength_parser.add_argument('fasta',
                                    help='a FASTA file which sequence '
                                    'lengths are to be obtained')
    fastalength_parser.add_argument('output',
                                    help='an output file of sequence '
                                    'lengths')

    # Parser for the 'chromosomer simulator' routine
    simulator_parser = subparsers.add_parser(
        'simulator',
        description='Simulate fragments and test assembly for '
        'testing purposes.',
        help='fragment simulator for testing purposes')

    # required arguments for the 'simulator' routine
    simulator_parser.add_argument('fr_num',
                                  type=int,
                                  help='the number of '
                                  'chromosome fragments')
    simulator_parser.add_argument('fr_len',
                                  type=int,
                                  help='the length of fragments')
    simulator_parser.add_argument('chr_num',
                                  type=int,
                                  help='the number of chromosomes')
    simulator_parser.add_argument('output_dir',
                                  help='the directory for output files')
    simulator_parser.add_argument('-g',
                                  '--gap_size',
                                  type=int,
                                  default=2000,
                                  help='the size of gaps between '
                                  'fragments on a chromosome')
    simulator_parser.add_argument('-p',
                                  '--unplaced',
                                  type=int,
                                  help='the number of unplaced '
                                  'fragments')
    simulator_parser.add_argument('--prefix',
                                  default='',
                                  help='the prefix for output file '
                                  'names')

    # Parser for the 'chromosomer agp2map' routine
    agp2map_parser = subparsers.add_parser(
        'agp2map',
        description='Convert an AGP file to the fragment map format.',
        help='convert an AGP file to a fragment map')

    # required arguments for the 'agp2map' routine
    agp2map_parser.add_argument('agp_file', help='an AGP file')
    agp2map_parser.add_argument('output_file',
                                help='the output '
                                'fragment map '
                                'file')

    args = parser.parse_args()

    if args.debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)
        logger.propagate = False
        formatter = logging.Formatter('%(asctime)-15s - %(message)s',
                                      '%Y-%m-%d %H:%M:%S')
        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        ch.setFormatter(formatter)
        logger.addHandler(ch)

        logging.basicConfig()
        cli_logger = logging.getLogger(__name__)
        cli_logger.propagate = False
        cli_logger.addHandler(ch)
        cli_logger.setLevel(logging.INFO)

    if args.command == 'assemble':
        fragment_map = Map()
        fragment_map.read(args.map)
        fragment_map.assemble(args.fragment_fasta, args.output_fasta,
                              args.save_soft_mask)
    elif args.command == 'fragmentmap':
        fragment_lengths = read_fragment_lengths(args.fragment_lengths)
        map_creator = AlignmentToMap(args.gap_size, fragment_lengths)
        with open(args.alignment_file) as alignment_file:
            alignments = BlastTab(alignment_file)
            fragment_map, unlocalized, unplaced = map_creator.blast(
                alignments, args.ratio_threshold)
            if args.shrink_gaps:
                fragment_map.shrink_gaps(args.gap_size)
            fragment_map.write(args.output_map)
            # write unlocalized and unplaced fragments
            with open(splitext(args.output_map)[0] + '_unlocalized.txt',
                      'w') as unlocalized_file:
                for i in unlocalized:
                    unlocalized_file.write('{}\t{}\n'.format(*i))
            with open(splitext(args.output_map)[0] + '_unplaced.txt',
                      'w') as unplaced_file:
                for i in unplaced:
                    unplaced_file.write('{}\n'.format(i))
    elif args.command == 'transfer':
        total_count = transferred_count = 0
        if args.format == 'bed':
            transferrer = BedTransfer(args.map)
            with open(args.annotation) as input_file:
                with bioformats.bed.Writer(args.output) as output_file:
                    for feature in bioformats.bed.Reader(input_file).records():
                        total_count += 1
                        transferred_feature = transferrer.feature(feature)
                        if transferred_feature is not None:
                            transferred_count += 1
                            output_file.write(transferred_feature)
        elif args.format == 'gff3':
            transferrer = Gff3Transfer(args.map)
            with open(args.annotation) as input_file:
                with bioformats.gff3.Writer(args.output) as output_file:
                    for feature in bioformats.gff3.Reader(
                            input_file).records():
                        total_count += 1
                        transferred_feature = transferrer.feature(feature)
                        if transferred_feature is not None:
                            transferred_count += 1
                            output_file.write(transferred_feature)
        elif args.format == 'vcf':
            transferrer = VcfTransfer(args.map)
            reader = vcf.Reader(open(args.annotation))
            writer = vcf.Writer(open(args.output, 'w'), reader)
            for variant in reader:
                total_count += 1
                transferred_feature = transferrer.feature(variant)
                if transferred_feature is not None:
                    transferred_count += 1
                    writer.write_record(transferred_feature)
            writer.close()

        logger.info('%d features transferred', transferred_count)
        logger.info('%d features skipped', total_count - transferred_count)
    elif args.command == 'fastalength':
        seq_lengths = SeqLengths(args.fasta)
        with open(args.output, 'wt') as length_file:
            length_writer = csv.writer(length_file, delimiter='\t')
            for header, length in seq_lengths.lengths().iteritems():
                length_writer.writerow((
                    header,
                    length,
                ))
    elif args.command == 'simulator':
        fr_simulator = Simulator(args.fr_len, args.fr_num, args.chr_num,
                                 args.unplaced, args.gap_size)
        map_file = os.path.join(args.output_dir, args.prefix + 'map.txt')
        chr_file = os.path.join(args.output_dir,
                                args.prefix + 'chromosomes.fa')
        fr_file = os.path.join(args.output_dir, args.prefix + 'fragments.fa')
        fr_simulator.write(map_file, fr_file, chr_file)
    elif args.command == 'fragmentmapstat':
        fragment_map = Map()
        fragment_map.read(args.map)
        summary = fragment_map.summary()
        template = '\t'.join(['{}'] * 4) + '\n'
        with open(args.output, 'w') as output_file:
            for chromosome in sorted(summary.keys()):
                output_file.write(
                    template.format(chromosome, *summary[chromosome]))
    elif args.command == 'fragmentmapbed':
        fragment_map = Map()
        fragment_map.read(args.map)
        fragment_map.convert2bed(args.output)
    elif args.command == 'agp2map':
        agp2map(args.agp_file, args.output_file)