Example #1
0
    def test_preprocess(self):
        """ Test the ability of mpathic.preprocess to collate data in multiple sequence files
        """

        print '\nIn test_preprocess...'
        file_names = glob.glob(self.input_dir + 'files_*.txt')

        # Make sure there are files to test
        self.assertTrue(len(file_names) > 0)

        for file_name in file_names:
            print '\t%s =' % file_name,
            description = file_name.split('_')[-1].split('.')[0]

            # If fasta or fastq, assume dna
            if ('fasta' in file_name) or ('fastq' in file_name):
                seq_type = 'dna'
            else:
                seq_type = None

            executable = lambda: preprocess.main(io.load_filelist(file_name),
                                                 indir=self.input_dir,
                                                 seq_type=seq_type)

            # If _good_, then preprocess.main should produce a valid df
            if ('_good' in file_name) or ('_fix' in file_name):
                try:
                    df = executable()
                    qc.validate_dataset(df)
                    out_file = self.output_dir + 'dataset_%s.txt' % description
                    io.write(df, out_file)  # Test write
                    io.load_dataset(out_file)  # Test loading
                    print 'good.'
                except:
                    print 'bad (ERROR).'
                    raise

            # If _bad, then preprocess.main should raise SortSeqError
            elif '_bad' in file_name:
                try:
                    self.assertRaises(SortSeqError, executable)
                    print 'badtype.'
                except:
                    print 'good (ERROR).'
                    raise

            # There are no other options
            else:
                raise SortSeqError('Unrecognized class of file_name.')
Example #2
0
    def test_preprocess(self):
        """ Test the ability of mpathic.preprocess to collate data in multiple sequence files
        """

        print '\nIn test_preprocess...'
        file_names = glob.glob(self.input_dir+'files_*.txt')

        # Make sure there are files to test
        self.assertTrue(len(file_names)>0)

        for file_name in file_names:
            print '\t%s ='%file_name,
            description = file_name.split('_')[-1].split('.')[0]

            # If fasta or fastq, assume dna
            if ('fasta' in file_name) or ('fastq' in file_name):
                seq_type = 'dna'
            else:
                seq_type = None

            executable = lambda: preprocess.main(io.load_filelist(file_name),indir=self.input_dir, seq_type=seq_type)

            # If _good_, then preprocess.main should produce a valid df
            if ('_good' in file_name) or ('_fix' in file_name):
                try:
                    df = executable()
                    qc.validate_dataset(df)
                    out_file = self.output_dir+'dataset_%s.txt'%description
                    io.write(df,out_file)       # Test write
                    io.load_dataset(out_file)   # Test loading
                    print 'good.'
                except:
                    print 'bad (ERROR).'
                    raise

            # If _bad, then preprocess.main should raise SortSeqError
            elif '_bad' in file_name:
                try:
                    self.assertRaises(SortSeqError,executable)
                    print 'badtype.'
                except:
                    print 'good (ERROR).'
                    raise

            # There are no other options
            else:
                raise SortSeqError('Unrecognized class of file_name.')
Example #3
0
def wrapper(args):
    """ Commandline wrapper for main()
    """  

    inloc = io.validate_file_for_reading(args.i) if args.i else sys.stdin
    outloc = io.validate_file_for_writing(args.out) if args.out else sys.stdout
    
    # Get filelist
    filelist_df = io.load_filelist(inloc)
    inloc.close()

    # Get tagkeys dataframe if provided
    if args.tagkeys:
        tagloc = io.validate_file_for_reading(args.tagkeys) 
        tags_df = io.load_tagkey(tagloc)
        tagloc.close()
    else:
        tags_df = None
    
    output_df = main(filelist_df,tags_df=tags_df,seq_type=args.seqtype)
    io.write(output_df,outloc,fast=args.fast)