コード例 #1
0
ファイル: merge.py プロジェクト: mwintersperger-tgm/prototype
def memconservingmerge(filepaths, keyset, outfile):
    """
    Takes all available keyset combinations from all files, then merges them into one.
    For this function memory usage was a more important criteria than speed, therefore it doesn't load all
    files into memory at the same time, meaning they need to be accessed and read multiple times to guarantee
    the intended results.
    :param filepaths:
    :param keyset:
    :param outfile:
    :return:
    """
    key = list()
    for filepath in filepaths:
        with open(filepath, "r") as file:
            file.readline()
            try:
                while True:
                    obj = util.parseline(file.readline())
                    for x in obj.keys():
                        obj[x] = obj[x]['value']
                    obj2 = keysonly(obj, keyset)
                    if obj2 not in key:
                        key.append(obj2)
            except Exception as err:
                print(err)
                pass
    print('Final Keys: ' + str(key) + '; length: ' + str(len(key)))
    importCls.start_file(outfile)
    for k in key:
        tempobj = k
        for filepath in filepaths:
            with open(filepath, "r") as file:
                file.readline()
                try:
                    while True:
                        obj = util.parseline(file.readline())
                        for x in obj.keys():
                            obj[x] = obj[x]['value']
                        if keyalign(k, obj, keyset):
                            tempobj = mergelinerisky([tempobj, obj])
                except Exception as err:
                    pass
        print('outobj: ' + str(tempobj))
        try:
            x = dict()
            for y in tempobj.keys():
                x[y] = dict()
                x[y]['value'] = tempobj[y]
                x[y]['validated'] = True
            importCls.forward(x, outfile)
        except Exception as err:
            pass
    with open(outfile, "a") as file:
        importCls.end_file(outfile)
コード例 #2
0
def exportcsvfromfile(source, outfile):
    """
    Exports the given data, which should be a list of dictionaries (expect errors if it isn't one)
    as a CSV file. First row includes the column names, all others include values
    :param source: json file created by import
    :param outfile: location of the file
    :return:
    """
    delimiter = ';'
    with open('../resources/outconfig.json') as file:
        try:
            conf = json.loads(file.read())
            delimiter = conf['delimiter']
        except Exception as err:
            print(err)
    firstset = True
    print("Exporting as CSV")
    with open(source, 'r') as sauce:
        with open(outfile, "a") as file:
            file.truncate(0)
            w = csv.writer(file, delimiter=delimiter)
            sauce.readline()
            try:
                while True:
                    x = util.parseline(sauce.readline())
                    if firstset:
                        w.writerow(x.keys())
                        firstset = False
                    arr = []
                    for y in x.values():
                        arr.append(y['value'])
                    print("Exporting line " + json.dumps(arr))
                    w.writerow(arr)
            except Exception as err:
                print(err)
コード例 #3
0
ファイル: merge.py プロジェクト: mwintersperger-tgm/prototype
def fullmergefiles(filepaths, keyset, outfile):
    """
    Counterpart to memconservingmerge. Doesn't care about memory usage at all, but is significantly faster since
    it only reads each file once and keeps all the values in the RAM.
    :param objects:
    :param keyset:
    :return:
    """
    objects = list()
    for filepath in filepaths:
        with open(filepath, "r") as file:
            file.readline()
            try:
                while True:
                    objects.append(util.parseline(file.readline()))
            except Exception as err:
                print(err)
                pass
    tmp = gatherkeys(objects, keyset)
    out = []
    for x in tmp:
        tmp2 = getkeygroup(objects, x)
        out.append(mergelinerisky(tmp2))
    importCls.start_file(outfile)
    for x in out:
        importCls.forward(x, outfile)
    importCls.end_file(outfile)
コード例 #4
0
def importxlsxmerge(infile, outfile, keyset):
    """
    Does the same as the other xlsx import, but with the slight difference that it assumes that there's already a JSON
    structure at the outfile location. I recommend checking if a file exists at the "outfile" location and then use
    either this method or importxlsx.
    :param infile:
    :param outfile:
    :param keyset:
    :return:
    """
    newfile = pandas.ExcelFile(infile)
    file = pandas.read_excel(open(infile, 'rb'), sheet_name=newfile.sheet_names[0])
    data = file.to_dict()
    print('\n' + str(data))
    keys = data.keys()
    keyslength = 0
    for x in keys:
        keyslength = data[x].keys()
        break
    length = len(keyslength)
    # print(length)
    # print(keys)
    arr = list()
    for num in range(0, length):
        obj = dict()
        for y in keys:
            z = y.strip()
            obj[z] = dict()
            obj[z]['value'] = data[z][num]
            obj[z]['validated'] = False
        arr.append(obj)
    print(arr)
    prevarr = list()
    with open(outfile) as file:
        string = file.readline()
        obj = util.parsefirstline(string)
        try:
            while True:
                prevarr.append(util.parseline(file.readline()))
        except Exception as err:
            print(err)
    for x in prevarr:
        align = False
        for y in arr:
            if Merge.keyalign(x, y, keyset):
                align = True
                x = Merge.mergelinerisky([y, x])
                break
        if not align:
            arr.append(x)
    start_file(outfile, obj['cc'], obj['locked'], obj['tablename'], obj['rules'])
    for x in arr:
        forward(x, outfile)
    end_file(outfile)
コード例 #5
0
def testmemconservingmerge():
    os.makedirs(os.path.dirname(os.path.abspath(__file__)) + "\\tmp")
    args = dict()
    args['param'] = json.loads('[{"propname":"firstname","generator":"name"},{"propname":"lastname","generator":"name"},{"propname":"bla","generator":"randchar8"},{"propname":"wtf","generator":"randchar8"},{"propname":"wtf2","generator":"randint8"},{"propname":"wtf3","generator":"randint6"}]')
    args['lines'] = 10
    args['return'] = True
    tmp = generateFile.lel(args)
    with open('tmp/data1.jayson', "a") as file:
        file.write(',\n')
    with open('tmp/data2.jayson', "a") as file:
        file.write(',\n')
    for y in tmp:
        obj1 = merge.keysonly(y, ['firstname', 'lastname', 'bla', 'wtf'])
        for x in obj1.keys():
            tmp1 = obj1[x]
            obj1[x] = dict()
            obj1[x]['value'] = tmp1
        obj2 = merge.keysonly(y, ['firstname', 'lastname', 'wtf2', 'wtf3'])
        for x in obj2.keys():
            tmp1 = obj2[x]
            obj2[x] = dict()
            obj2[x]['value'] = tmp1
        with open('tmp/data1.jayson', "a") as file:
            file.write(json.dumps(obj1) + ',\n')
        with open('tmp/data2.jayson', "a") as file:
            file.write(json.dumps(obj2) + ',\n')
    open('tmp/data.jayson', "a").close()
    merge.memconservingmerge(['tmp/data1.jayson', 'tmp/data2.jayson'], ['firstname', 'lastname'], 'tmp/data.jayson')
    res = list()
    with open('tmp/data.jayson', "r") as file:
        file.readline()
        try:
            while True:
                obj = util.parseline(file.readline())
                res.append(obj)
        except Exception as err:
            print('test 299: ' + str(err))
    print('length: ' + str(len(res)) + '; result list: ' + str(res) )
    print('length: ' + str(len(tmp)) + '; source list: ' + str(tmp))
    exists = True
    for x in tmp:
        y = dict()
        y['value'] = x
        y['validated'] = True
        if y not in res:
            exists = False
    shutil.rmtree(os.path.dirname(os.path.abspath(__file__)) + "\\tmp")
    assert exists