예제 #1
0
def generate_strings(basename, fields, tuples, datarange):
    """
     First attribute is integer and the rest are strings
    """
    random.seed(1)
    fn = get_name(basename, fields)
    with open(fn, 'w') as f:
        print "generating %s" % (os.path.abspath(fn))
        known_str = "coffee"
        tuple_with_known = random.randint(0, tuples-1)
        for i in range(0, tuples):
            f.write(str(random.randint(0, datarange)))
            if 0 < (fields - 1):
                f.write(' ')
            for j in range(1, fields):
                if i == tuple_with_known and j == 1:
                    f.write(known_str)
                else:
                    strmax = 24
                    strlength = random.randint(1, strmax)
                    s = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(strlength))
                    f.write(s)
                if j < (fields - 1):
                    f.write(' ')
            f.write("\n")

    indexing(fn, ' ')
예제 #2
0
def generate_strings(basename, fields, tuples, datarange):
    """
     First attribute is integer and the rest are strings
    """
    random.seed(1)
    fn = get_name(basename, fields)
    with open(fn, 'w') as f:
        print "generating %s" % (os.path.abspath(fn))
        known_str = "coffee"
        tuple_with_known = random.randint(0, tuples-1)
        for i in range(0, tuples):
            f.write(str(random.randint(0, datarange)))
            if 0 < (fields - 1):
                f.write(' ')
            for j in range(1, fields):
                if i == tuple_with_known and j == 1:
                    f.write(known_str)
                else:
                    strmax = 24
                    strlength = random.randint(1, strmax)
                    s = ''.join(random.choice(string.ascii_uppercase) for _ in range(strlength))
                    f.write(s)
                if j < (fields - 1):
                    f.write(' ')
            f.write("\n")

    indexing(fn, ' ')
예제 #3
0
upload_files = []
link_files = []


def add_data_file(f):
    if args.softlink_data:
        link_files.append(f)
    else:
        upload_files.append(f)


if args.ext_index:
    assert not args.splits, "--splits and --external-string-indexing not currently compatible"
    task_message("indexing")
    datafile, indexfile = indexing(inputf)
    upload_files.append(indexfile)
else:
    datafile = inputf

if args.storage == 'binary':
    assert not args.splits, "--splits and --storage=binary not currently compatible"

    # TODO: have an option to use Grappa to index the strings
    # see $GRAPPA_HOME/build/Make+Release/applications/join/convert2bin.exe

    task_message("generating binary converter")
    cat, rel_key, convert_cpp_name = generate_tuple_class_from_file(
        args.relation_name, catalogfile)

    #TODO: rel_key is wrong!! is public:adhoc:x need just x
예제 #4
0
def task_message(s):
    print "{0}...".format(s)

upload_files = []
link_files = []
def add_data_file(f):
    if args.softlink_data:
        link_files.append(f)
    else:
        upload_files.append(f)

if args.ext_index:
    assert not args.splits, "--splits and --external-string-indexing not currently compatible"
    task_message("indexing")
    datafile, indexfile = indexing(inputf)
    upload_files.append(indexfile)
else:
    datafile = inputf

if args.storage == 'binary':
    assert not args.splits, "--splits and --storage=binary not currently compatible"

    # TODO: have an option to use Grappa to index the strings
    # see $GRAPPA_HOME/build/Make+Release/applications/join/convert2bin.exe

    task_message("generating binary converter")
    cat, rel_key, convert_cpp_name = generate_tuple_class_from_file(
        args.relation_name,
        catalogfile)