Beispiel #1
0
def main():
    usage = "%prog [options] <in-file>"
    parser = OptionParser(usage=usage)
    parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", default=DEFAULT_PARTITIONS, help="the number of partitions to use (default: %d)" % DEFAULT_PARTITIONS)
    parser.add_option("--ids", dest="ids", action="store_true", help="don't output any files - just print out a list of the ids of the sequences in each partition")
    options, arguments = parser.parse_args()
        
    if len(arguments) == 0:
        print >>sys.stderr, "You must specify an input data file"
        sys.exit(1)
    filename = os.path.abspath(arguments[0])
    
    # Read in the data file
    seqs = SequenceIndex.from_file(filename)
    
    part_pattern = "%s.part%%d" % filename
    heldout_pattern = "%s.heldout_part%%d" % filename
    # Divide the data up into partitions, with their complements
    parts = zip(partition(seqs.sequences, options.partitions), holdout_partition(seqs.sequences, options.partitions))
    # Save each partition and its complement
    for i,(part,heldout) in enumerate(parts):
        if options.ids:
            # Just print out a list of the ids in the partition
            print " ".join(["%d" % s.id for s in part])
        else:
            save_sequences(part_pattern % i, part)
            save_sequences(heldout_pattern % i, heldout)
            print >>sys.stderr, "Wrote partition %d to %s and %s" % (i,part_pattern % i,heldout_pattern % i)
Beispiel #2
0
def main():
    usage = "%prog [options] <in-file>"
    parser = OptionParser(usage=usage)
    parser.add_option("-p",
                      "--partitions",
                      dest="partitions",
                      action="store",
                      type="int",
                      default=DEFAULT_PARTITIONS,
                      help="the number of partitions to use (default: %d)" %
                      DEFAULT_PARTITIONS)
    parser.add_option(
        "--ids",
        dest="ids",
        action="store_true",
        help=
        "don't output any files - just print out a list of the ids of the sequences in each partition"
    )
    options, arguments = parser.parse_args()

    if len(arguments) == 0:
        print >> sys.stderr, "You must specify an input data file"
        sys.exit(1)
    filename = os.path.abspath(arguments[0])

    # Read in the data file
    seqs = SequenceIndex.from_file(filename)

    part_pattern = "%s.part%%d" % filename
    heldout_pattern = "%s.heldout_part%%d" % filename
    # Divide the data up into partitions, with their complements
    parts = zip(partition(seqs.sequences, options.partitions),
                holdout_partition(seqs.sequences, options.partitions))
    # Save each partition and its complement
    for i, (part, heldout) in enumerate(parts):
        if options.ids:
            # Just print out a list of the ids in the partition
            print " ".join(["%d" % s.id for s in part])
        else:
            save_sequences(part_pattern % i, part)
            save_sequences(heldout_pattern % i, heldout)
            print >> sys.stderr, "Wrote partition %d to %s and %s" % (
                i, part_pattern % i, heldout_pattern % i)
Beispiel #3
0
def main():
    usage = "%prog [options] <in-file>"
    description = "Filter a sequence data file to remove any sequences "\
        "that are not fully annotated and write the result back to the file."
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()
        
    if len(arguments) == 0:
        print >>sys.stderr, "You must specify an input data file"
        sys.exit(1)
    in_filename = os.path.abspath(arguments[0])
    
    # Read in the data file
    seqs = SequenceIndex.from_file(in_filename)
    
    sequences = [seq for seq in seqs.sequences if seq.fully_annotated]
    save_sequences(in_filename, sequences)
    
    print >>sys.stderr, "Removed %d sequences" % (len(seqs.sequences)-len(sequences))
Beispiel #4
0
def main():
    usage = "%prog [options] <in-file>"
    description = "Filter a sequence data file to remove any sequences "\
        "that are not fully annotated and write the result back to the file."
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()

    if len(arguments) == 0:
        print >> sys.stderr, "You must specify an input data file"
        sys.exit(1)
    in_filename = os.path.abspath(arguments[0])

    # Read in the data file
    seqs = SequenceIndex.from_file(in_filename)

    sequences = [seq for seq in seqs.sequences if seq.fully_annotated]
    save_sequences(in_filename, sequences)

    print >> sys.stderr, "Removed %d sequences" % (len(seqs.sequences) -
                                                   len(sequences))