def memconservingmerge(filepaths, keyset, outfile):
    """
    Takes all available keyset combinations from all files, then merges them into one.
    For this function memory usage was a more important criteria than speed, therefore it doesn't load all
    files into memory at the same time, meaning they need to be accessed and read multiple times to guarantee
    the intended results.
    :param filepaths:
    :param keyset:
    :param outfile:
    :return:
    """
    key = list()
    for filepath in filepaths:
        with open(filepath, "r") as file:
            file.readline()
            try:
                while True:
                    obj = util.parseline(file.readline())
                    for x in obj.keys():
                        obj[x] = obj[x]['value']
                    obj2 = keysonly(obj, keyset)
                    if obj2 not in key:
                        key.append(obj2)
            except Exception as err:
                print(err)
                pass
    print('Final Keys: ' + str(key) + '; length: ' + str(len(key)))
    importCls.start_file(outfile)
    for k in key:
        tempobj = k
        for filepath in filepaths:
            with open(filepath, "r") as file:
                file.readline()
                try:
                    while True:
                        obj = util.parseline(file.readline())
                        for x in obj.keys():
                            obj[x] = obj[x]['value']
                        if keyalign(k, obj, keyset):
                            tempobj = mergelinerisky([tempobj, obj])
                except Exception as err:
                    pass
        print('outobj: ' + str(tempobj))
        try:
            x = dict()
            for y in tempobj.keys():
                x[y] = dict()
                x[y]['value'] = tempobj[y]
                x[y]['validated'] = True
            importCls.forward(x, outfile)
        except Exception as err:
            pass
    with open(outfile, "a") as file:
        importCls.end_file(outfile)
Exemple #2
0
def exportcsvfromfile(source, outfile):
    """
    Exports the given data, which should be a list of dictionaries (expect errors if it isn't one)
    as a CSV file. First row includes the column names, all others include values
    :param source: json file created by import
    :param outfile: location of the file
    :return:
    """
    delimiter = ';'
    with open('../resources/outconfig.json') as file:
        try:
            conf = json.loads(file.read())
            delimiter = conf['delimiter']
        except Exception as err:
            print(err)
    firstset = True
    print("Exporting as CSV")
    with open(source, 'r') as sauce:
        with open(outfile, "a") as file:
            file.truncate(0)
            w = csv.writer(file, delimiter=delimiter)
            sauce.readline()
            try:
                while True:
                    x = util.parseline(sauce.readline())
                    if firstset:
                        w.writerow(x.keys())
                        firstset = False
                    arr = []
                    for y in x.values():
                        arr.append(y['value'])
                    print("Exporting line " + json.dumps(arr))
                    w.writerow(arr)
            except Exception as err:
                print(err)
def fullmergefiles(filepaths, keyset, outfile):
    """
    Counterpart to memconservingmerge. Doesn't care about memory usage at all, but is significantly faster since
    it only reads each file once and keeps all the values in the RAM.
    :param objects:
    :param keyset:
    :return:
    """
    objects = list()
    for filepath in filepaths:
        with open(filepath, "r") as file:
            file.readline()
            try:
                while True:
                    objects.append(util.parseline(file.readline()))
            except Exception as err:
                print(err)
                pass
    tmp = gatherkeys(objects, keyset)
    out = []
    for x in tmp:
        tmp2 = getkeygroup(objects, x)
        out.append(mergelinerisky(tmp2))
    importCls.start_file(outfile)
    for x in out:
        importCls.forward(x, outfile)
    importCls.end_file(outfile)
def importxlsxmerge(infile, outfile, keyset):
    """
    Does the same as the other xlsx import, but with the slight difference that it assumes that there's already a JSON
    structure at the outfile location. I recommend checking if a file exists at the "outfile" location and then use
    either this method or importxlsx.
    :param infile:
    :param outfile:
    :param keyset:
    :return:
    """
    newfile = pandas.ExcelFile(infile)
    file = pandas.read_excel(open(infile, 'rb'), sheet_name=newfile.sheet_names[0])
    data = file.to_dict()
    print('\n' + str(data))
    keys = data.keys()
    keyslength = 0
    for x in keys:
        keyslength = data[x].keys()
        break
    length = len(keyslength)
    # print(length)
    # print(keys)
    arr = list()
    for num in range(0, length):
        obj = dict()
        for y in keys:
            z = y.strip()
            obj[z] = dict()
            obj[z]['value'] = data[z][num]
            obj[z]['validated'] = False
        arr.append(obj)
    print(arr)
    prevarr = list()
    with open(outfile) as file:
        string = file.readline()
        obj = util.parsefirstline(string)
        try:
            while True:
                prevarr.append(util.parseline(file.readline()))
        except Exception as err:
            print(err)
    for x in prevarr:
        align = False
        for y in arr:
            if Merge.keyalign(x, y, keyset):
                align = True
                x = Merge.mergelinerisky([y, x])
                break
        if not align:
            arr.append(x)
    start_file(outfile, obj['cc'], obj['locked'], obj['tablename'], obj['rules'])
    for x in arr:
        forward(x, outfile)
    end_file(outfile)
def testmemconservingmerge():
    os.makedirs(os.path.dirname(os.path.abspath(__file__)) + "\\tmp")
    args = dict()
    args['param'] = json.loads('[{"propname":"firstname","generator":"name"},{"propname":"lastname","generator":"name"},{"propname":"bla","generator":"randchar8"},{"propname":"wtf","generator":"randchar8"},{"propname":"wtf2","generator":"randint8"},{"propname":"wtf3","generator":"randint6"}]')
    args['lines'] = 10
    args['return'] = True
    tmp = generateFile.lel(args)
    with open('tmp/data1.jayson', "a") as file:
        file.write(',\n')
    with open('tmp/data2.jayson', "a") as file:
        file.write(',\n')
    for y in tmp:
        obj1 = merge.keysonly(y, ['firstname', 'lastname', 'bla', 'wtf'])
        for x in obj1.keys():
            tmp1 = obj1[x]
            obj1[x] = dict()
            obj1[x]['value'] = tmp1
        obj2 = merge.keysonly(y, ['firstname', 'lastname', 'wtf2', 'wtf3'])
        for x in obj2.keys():
            tmp1 = obj2[x]
            obj2[x] = dict()
            obj2[x]['value'] = tmp1
        with open('tmp/data1.jayson', "a") as file:
            file.write(json.dumps(obj1) + ',\n')
        with open('tmp/data2.jayson', "a") as file:
            file.write(json.dumps(obj2) + ',\n')
    open('tmp/data.jayson', "a").close()
    merge.memconservingmerge(['tmp/data1.jayson', 'tmp/data2.jayson'], ['firstname', 'lastname'], 'tmp/data.jayson')
    res = list()
    with open('tmp/data.jayson', "r") as file:
        file.readline()
        try:
            while True:
                obj = util.parseline(file.readline())
                res.append(obj)
        except Exception as err:
            print('test 299: ' + str(err))
    print('length: ' + str(len(res)) + '; result list: ' + str(res) )
    print('length: ' + str(len(tmp)) + '; source list: ' + str(tmp))
    exists = True
    for x in tmp:
        y = dict()
        y['value'] = x
        y['validated'] = True
        if y not in res:
            exists = False
    shutil.rmtree(os.path.dirname(os.path.abspath(__file__)) + "\\tmp")
    assert exists