def generate_strings(basename, fields, tuples, datarange): """ First attribute is integer and the rest are strings """ random.seed(1) fn = get_name(basename, fields) with open(fn, 'w') as f: print "generating %s" % (os.path.abspath(fn)) known_str = "coffee" tuple_with_known = random.randint(0, tuples-1) for i in range(0, tuples): f.write(str(random.randint(0, datarange))) if 0 < (fields - 1): f.write(' ') for j in range(1, fields): if i == tuple_with_known and j == 1: f.write(known_str) else: strmax = 24 strlength = random.randint(1, strmax) s = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(strlength)) f.write(s) if j < (fields - 1): f.write(' ') f.write("\n") indexing(fn, ' ')
def generate_strings(basename, fields, tuples, datarange): """ First attribute is integer and the rest are strings """ random.seed(1) fn = get_name(basename, fields) with open(fn, 'w') as f: print "generating %s" % (os.path.abspath(fn)) known_str = "coffee" tuple_with_known = random.randint(0, tuples-1) for i in range(0, tuples): f.write(str(random.randint(0, datarange))) if 0 < (fields - 1): f.write(' ') for j in range(1, fields): if i == tuple_with_known and j == 1: f.write(known_str) else: strmax = 24 strlength = random.randint(1, strmax) s = ''.join(random.choice(string.ascii_uppercase) for _ in range(strlength)) f.write(s) if j < (fields - 1): f.write(' ') f.write("\n") indexing(fn, ' ')
upload_files = [] link_files = [] def add_data_file(f): if args.softlink_data: link_files.append(f) else: upload_files.append(f) if args.ext_index: assert not args.splits, "--splits and --external-string-indexing not currently compatible" task_message("indexing") datafile, indexfile = indexing(inputf) upload_files.append(indexfile) else: datafile = inputf if args.storage == 'binary': assert not args.splits, "--splits and --storage=binary not currently compatible" # TODO: have an option to use Grappa to index the strings # see $GRAPPA_HOME/build/Make+Release/applications/join/convert2bin.exe task_message("generating binary converter") cat, rel_key, convert_cpp_name = generate_tuple_class_from_file( args.relation_name, catalogfile) #TODO: rel_key is wrong!! is public:adhoc:x need just x
def task_message(s): print "{0}...".format(s) upload_files = [] link_files = [] def add_data_file(f): if args.softlink_data: link_files.append(f) else: upload_files.append(f) if args.ext_index: assert not args.splits, "--splits and --external-string-indexing not currently compatible" task_message("indexing") datafile, indexfile = indexing(inputf) upload_files.append(indexfile) else: datafile = inputf if args.storage == 'binary': assert not args.splits, "--splits and --storage=binary not currently compatible" # TODO: have an option to use Grappa to index the strings # see $GRAPPA_HOME/build/Make+Release/applications/join/convert2bin.exe task_message("generating binary converter") cat, rel_key, convert_cpp_name = generate_tuple_class_from_file( args.relation_name, catalogfile)