help='''SQL expression to filter the query which selects the sequences in the database. Default is to export all sequences in database. Basic query is: SELECT seqid, seq, phred FROM seqs INNER JOIN samples ON seqs.sampleId=samples.sampleId WHERE <filter_expression> ''') parser.add_argument('-s', dest='startidx', default=0, help='Starting base index of DNA sequences that are written to file, used to miss out cutsite if desired.') parser.add_argument('-f', dest='format', default='fasta', help='Format of file written to output.') parser.add_argument('-b', dest='rowbuffer', default=100000, help='Read write buffer. Number of records to read before writing to file.') parser.add_argument('-F', dest='overwrite', default=False, help='Overwrite any file with same name as output.') args = parser.parse_args() # Write records to output db = Reads_db(args.input, recbyname=True) fastafile_handle = db.write_reads(args.output, output_format=args.format, filter_expression=args.filter_expression, startidx=args.startidx, rowbuffer=args.rowbuffer, overwrite=args.overwrite)
WHERE <filter_expression> ''') parser.add_argument('-s', dest='startidx', default=0, help='Starting base index of DNA sequences that are written to file, used to miss out cutsite if desired.') parser.add_argument('-f', dest='format', default='fasta', help='Format of file written to output.') print sys.argv args = parser.parse_args() # Write records to output db = Reads_db(args.input, recbyname=True) clusters_list = db.get_cluster_by_size() fastafile_handle = db.write_reads(args.output, format=args.format, filter_expression=args.filter_expression, startidx=args.startidx) if args.query_expression: fastafile_handle = db.write_reads(args.pattern, args.output, use_type_column=args.typeflag, format='fasta')