def memconservingmerge(filepaths, keyset, outfile): """ Takes all available keyset combinations from all files, then merges them into one. For this function memory usage was a more important criteria than speed, therefore it doesn't load all files into memory at the same time, meaning they need to be accessed and read multiple times to guarantee the intended results. :param filepaths: :param keyset: :param outfile: :return: """ key = list() for filepath in filepaths: with open(filepath, "r") as file: file.readline() try: while True: obj = util.parseline(file.readline()) for x in obj.keys(): obj[x] = obj[x]['value'] obj2 = keysonly(obj, keyset) if obj2 not in key: key.append(obj2) except Exception as err: print(err) pass print('Final Keys: ' + str(key) + '; length: ' + str(len(key))) importCls.start_file(outfile) for k in key: tempobj = k for filepath in filepaths: with open(filepath, "r") as file: file.readline() try: while True: obj = util.parseline(file.readline()) for x in obj.keys(): obj[x] = obj[x]['value'] if keyalign(k, obj, keyset): tempobj = mergelinerisky([tempobj, obj]) except Exception as err: pass print('outobj: ' + str(tempobj)) try: x = dict() for y in tempobj.keys(): x[y] = dict() x[y]['value'] = tempobj[y] x[y]['validated'] = True importCls.forward(x, outfile) except Exception as err: pass with open(outfile, "a") as file: importCls.end_file(outfile)
def exportcsvfromfile(source, outfile): """ Exports the given data, which should be a list of dictionaries (expect errors if it isn't one) as a CSV file. First row includes the column names, all others include values :param source: json file created by import :param outfile: location of the file :return: """ delimiter = ';' with open('../resources/outconfig.json') as file: try: conf = json.loads(file.read()) delimiter = conf['delimiter'] except Exception as err: print(err) firstset = True print("Exporting as CSV") with open(source, 'r') as sauce: with open(outfile, "a") as file: file.truncate(0) w = csv.writer(file, delimiter=delimiter) sauce.readline() try: while True: x = util.parseline(sauce.readline()) if firstset: w.writerow(x.keys()) firstset = False arr = [] for y in x.values(): arr.append(y['value']) print("Exporting line " + json.dumps(arr)) w.writerow(arr) except Exception as err: print(err)
def fullmergefiles(filepaths, keyset, outfile): """ Counterpart to memconservingmerge. Doesn't care about memory usage at all, but is significantly faster since it only reads each file once and keeps all the values in the RAM. :param objects: :param keyset: :return: """ objects = list() for filepath in filepaths: with open(filepath, "r") as file: file.readline() try: while True: objects.append(util.parseline(file.readline())) except Exception as err: print(err) pass tmp = gatherkeys(objects, keyset) out = [] for x in tmp: tmp2 = getkeygroup(objects, x) out.append(mergelinerisky(tmp2)) importCls.start_file(outfile) for x in out: importCls.forward(x, outfile) importCls.end_file(outfile)
def importxlsxmerge(infile, outfile, keyset): """ Does the same as the other xlsx import, but with the slight difference that it assumes that there's already a JSON structure at the outfile location. I recommend checking if a file exists at the "outfile" location and then use either this method or importxlsx. :param infile: :param outfile: :param keyset: :return: """ newfile = pandas.ExcelFile(infile) file = pandas.read_excel(open(infile, 'rb'), sheet_name=newfile.sheet_names[0]) data = file.to_dict() print('\n' + str(data)) keys = data.keys() keyslength = 0 for x in keys: keyslength = data[x].keys() break length = len(keyslength) # print(length) # print(keys) arr = list() for num in range(0, length): obj = dict() for y in keys: z = y.strip() obj[z] = dict() obj[z]['value'] = data[z][num] obj[z]['validated'] = False arr.append(obj) print(arr) prevarr = list() with open(outfile) as file: string = file.readline() obj = util.parsefirstline(string) try: while True: prevarr.append(util.parseline(file.readline())) except Exception as err: print(err) for x in prevarr: align = False for y in arr: if Merge.keyalign(x, y, keyset): align = True x = Merge.mergelinerisky([y, x]) break if not align: arr.append(x) start_file(outfile, obj['cc'], obj['locked'], obj['tablename'], obj['rules']) for x in arr: forward(x, outfile) end_file(outfile)
def testmemconservingmerge(): os.makedirs(os.path.dirname(os.path.abspath(__file__)) + "\\tmp") args = dict() args['param'] = json.loads('[{"propname":"firstname","generator":"name"},{"propname":"lastname","generator":"name"},{"propname":"bla","generator":"randchar8"},{"propname":"wtf","generator":"randchar8"},{"propname":"wtf2","generator":"randint8"},{"propname":"wtf3","generator":"randint6"}]') args['lines'] = 10 args['return'] = True tmp = generateFile.lel(args) with open('tmp/data1.jayson', "a") as file: file.write(',\n') with open('tmp/data2.jayson', "a") as file: file.write(',\n') for y in tmp: obj1 = merge.keysonly(y, ['firstname', 'lastname', 'bla', 'wtf']) for x in obj1.keys(): tmp1 = obj1[x] obj1[x] = dict() obj1[x]['value'] = tmp1 obj2 = merge.keysonly(y, ['firstname', 'lastname', 'wtf2', 'wtf3']) for x in obj2.keys(): tmp1 = obj2[x] obj2[x] = dict() obj2[x]['value'] = tmp1 with open('tmp/data1.jayson', "a") as file: file.write(json.dumps(obj1) + ',\n') with open('tmp/data2.jayson', "a") as file: file.write(json.dumps(obj2) + ',\n') open('tmp/data.jayson', "a").close() merge.memconservingmerge(['tmp/data1.jayson', 'tmp/data2.jayson'], ['firstname', 'lastname'], 'tmp/data.jayson') res = list() with open('tmp/data.jayson', "r") as file: file.readline() try: while True: obj = util.parseline(file.readline()) res.append(obj) except Exception as err: print('test 299: ' + str(err)) print('length: ' + str(len(res)) + '; result list: ' + str(res) ) print('length: ' + str(len(tmp)) + '; source list: ' + str(tmp)) exists = True for x in tmp: y = dict() y['value'] = x y['validated'] = True if y not in res: exists = False shutil.rmtree(os.path.dirname(os.path.abspath(__file__)) + "\\tmp") assert exists