def main(): usage = "%prog <out-file>" parser = OptionParser(usage=usage) parser.add_option("-r", "--reannotated", dest="reannotated", action="store_true", help="include sequences that are reannotations of others") parser.add_option("-p", "--partial", dest="partial", action="store_true", help="include sequences that are only partly annotated") parser.add_option("-n", "--no-names", dest="no_names", action="store_true", help="obscure names of the chord sequences") options, arguments = parser.parse_args() if len(arguments) == 0: print >>sys.stderr, "You must specify an output file as the first argument" sys.exit(1) filename = arguments[0] print "Storing all sequences except those marked as unanalysed" q = Q(analysis_omitted=False) if not options.reannotated: q = q & Q(alternative=False) if not options.partial: f = lambda s: s.fully_annotated else: f = None save_pickled_data(filename, query=q, filter=f, no_names=options.no_names) sys.exit(0)
def main(): usage = "%prog <out-file>" parser = OptionParser(usage=usage) parser.add_option( "-r", "--reannotated", dest="reannotated", action="store_true", help="include sequences that are reannotations of others") parser.add_option("-p", "--partial", dest="partial", action="store_true", help="include sequences that are only partly annotated") parser.add_option("-n", "--no-names", dest="no_names", action="store_true", help="obscure names of the chord sequences") options, arguments = parser.parse_args() if len(arguments) == 0: print >> sys.stderr, "You must specify an output file as the first argument" sys.exit(1) filename = arguments[0] print "Storing all sequences except those marked as unanalysed" q = Q(analysis_omitted=False) if not options.reannotated: q = q & Q(alternative=False) if not options.partial: f = lambda s: s.fully_annotated else: f = None save_pickled_data(filename, query=q, filter=f, no_names=options.no_names) sys.exit(0)
from django.db.models import Q from jazzparser.utils.data import holdout_partition, partition import os.path, sys NUM_PARTITIONS = 10 FILENAME = "partition" # Build a list of the sequences to put in each partition # Only include fully annotated sequences print >> sys.stderr, "Building list of fully annotated sequences" seqs = [ seq.id for seq in ChordSequence.objects.filter(analysis_omitted=False) if seq.fully_annotated ] partitions = zip(partition(seqs, NUM_PARTITIONS), holdout_partition(seqs, NUM_PARTITIONS)) for i, parts in enumerate(partitions): part, rest = parts # Output two files for each partition part_file = "%s-%d" % (FILENAME, i) held_file = "%s-%d-heldout" % (FILENAME, i) print >> sys.stderr, "Outputing partition %d to %s and %s" % (i, part_file, held_file) # Output the partition's file query = Q(id__in=part) save_pickled_data(part_file, query) # Output the rest of the data query = Q(id__in=rest) save_pickled_data(held_file, query)
from apps.sequences.datautils import save_pickled_data from apps.sequences.models import ChordSequence from django.db.models import Q from jazzparser.utils.data import holdout_partition, partition import os.path, sys NUM_PARTITIONS = 10 FILENAME = "partition" # Build a list of the sequences to put in each partition # Only include fully annotated sequences print >>sys.stderr, "Building list of fully annotated sequences" seqs = [seq.id for seq in ChordSequence.objects.filter(analysis_omitted=False) if seq.fully_annotated] partitions = zip(partition(seqs, NUM_PARTITIONS), holdout_partition(seqs, NUM_PARTITIONS)) for i,parts in enumerate(partitions): part, rest = parts # Output two files for each partition part_file = "%s-%d" % (FILENAME, i) held_file = "%s-%d-heldout" % (FILENAME, i) print >>sys.stderr, "Outputing partition %d to %s and %s" % (i, part_file, held_file) # Output the partition's file query = Q(id__in=part) save_pickled_data(part_file, query) # Output the rest of the data query = Q(id__in=rest) save_pickled_data(held_file, query)