def __init__(self, **kw): super().__init__(**kw) a = self.select_table isNumeric = cfn('ISNUMERIC', ['val']) left = cfn('LEFT', ['val', 'num']) c, d = pk.Tables('UnitSMR', 'EquipType') days = fn.DateDiff(PseudoColumn('day'), a.DeliveryDate, fn.CurTimestamp()) remaining = Case().when(days <= 365, 365 - days).else_(0).as_('Remaining') remaining2 = Case().when(days <= 365 * 2, 365 * 2 - days).else_(0) ge_remaining = Case().when(isNumeric(left(a.Model, 1)) == 1, remaining2).else_(None).as_('GE_Remaining') b = c.select(c.Unit, fn.Max(c.SMR).as_('CurrentSMR'), fn.Max(c.DateSMR).as_('DateSMR')).groupby(c.Unit).as_('b') cols = [a.MineSite, a.Customer, d.EquipClass, a.Model, a.Serial, a.Unit, b.CurrentSMR, b.DateSMR, a.DeliveryDate, remaining, ge_remaining] q = Query.from_(a) \ .left_join(b).on_field('Unit') \ .left_join(d).on_field('Model') \ .orderby(a.MineSite, a.Model, a.Unit) f.set_self(vars()) # NOTE lots of duplication with this pattern btwn avail/ac inspect/units/comp co # can't remember how everything works and don't want to dig into it self.stylemap_cols |= {'Model': dict( cols=['Model'], func=st.pipe_highlight_alternating, da=dict( subset=['Model'], color='maroon', theme=self.theme))}
def loadAndCheckFile(): if request.method == 'POST': file = request.files['file'] if file and (allowed_file( file.filename) #or allowed_archive(file.filename) ): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) fileId = addOneFile(app.config['UPLOAD_FOLDER'], filename)[1] #TODO: Удалить файл после всех операций? return trueAlgo(fileId) elif file and (allowed_archive(file.filename)): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) with temporary_directory() as tmp: #игнорируем ошибки. tmp_dir_name = tmp path = os.path.join(os.getcwd(), tmp_dir_name) with zipfile.ZipFile( os.path.join(app.config['UPLOAD_FOLDER'], filename)) as zf: zf.extractall(path) info = addManyFiles(path, filename) results = [] allMetaphones = getAllMetaphones() for val in info: print(val) results.append(trueAlgo(val[1], True, allMetaphones)) jsonResult = json.dumps(results) q = Query.into(db.tables["SearchResult"]).columns( "result", "createdAt").insert(jsonResult, functions.CurTimestamp()) executeQ(q) return jsonify(results) else: return jsonify({"error": "failed"})
def test_current_timestamp_with_alias(self): query = Query.select(fn.CurTimestamp('ts')) self.assertEqual("SELECT CURRENT_TIMESTAMP \"ts\"", str(query))
def test_current_timestamp(self): query = Query.select(fn.CurTimestamp()) self.assertEqual("SELECT CURRENT_TIMESTAMP", str(query))
def test_current_timestamp_with_alias(self): query = Query.select(fn.CurTimestamp("ts")) self.assertEqual('SELECT CURRENT_TIMESTAMP "ts"', str(query))
def addOneFile(dir, fileName, entryName="", id=0): cwd = os.getcwd() os.chdir(dir) if len(entryName) == 0: entryName = fileName code = "" splittedCode = [] #print("AddFile: ", os.path.join(dir, fileName)) with open(os.path.join(dir, fileName), encoding='utf-8', errors="replace") as f: code = f.read() #code = code.replace("\n", "") code = prettierCode(code, fileName) code = code.replace("\t", "") codeInBytes = str.encode(code, encoding='utf-8', errors="replace") hash_object = hashlib.sha256(codeInBytes) q = Query.from_(db.tables["File"]).select( "id", "path", "entryId").where(db.tables["File"].hash == hash_object.hexdigest()) checkDuplicate = executeQ(q, True) for row in checkDuplicate: if row[1] == os.path.join(dir, fileName): print("Дубликат!!!", checkDuplicate[0][0], row[1], os.path.join(dir, fileName)) return (checkDuplicate[0][2], checkDuplicate[0][0], True) index1 = row[1].find(os.path.join("Local", "Temp")) index2 = os.path.join(dir, fileName).find(os.path.join("Local", "Temp")) if index1 > -1 and index2 > -1: path1 = row[1].split(os.sep) path2 = os.path.join(dir, fileName).split(os.sep) if len(path1) == len(path2): startCheck = False tempPassed = False checkStatus = True for i in range(len(path1)): if path1[i] == "Temp" and not tempPassed: tempPassed = True elif startCheck: if path1[i] != path2[i]: checkStatus = False break elif tempPassed: startCheck = True if checkStatus: print("Дубликат!", checkDuplicate[0][0], row[1], os.path.join(dir, fileName)) return (checkDuplicate[0][2], checkDuplicate[0][0], True) #if checkDuplicate: # print("Дубликат!", checkDuplicate[0][0]) # return (0, checkDuplicate[0][0]) if id == 0: q = Query.into(db.tables["Entry"]).columns('name', 'createdAt').insert( entryName, functions.CurTimestamp()) executeQ(q) q = Query.from_(db.tables["Entry"]).select('id').orderby( 'id', order=Order.desc).limit(1) id = getId(executeQ(q, True)) fileId = 0 q = Query.into(db.tables["File"]).columns("entryId", "path", "hash").insert( id, os.path.join(dir, fileName), hash_object.hexdigest()) executeQ(q) q = Query.from_(db.tables["File"]).select('id').orderby( 'id', order=Order.desc).limit(1) fileId = getId(executeQ(q, True)) code = code.split("\n") #shift = 0 for string in code: stringInBytes = str.encode(string, encoding='utf-8', errors="replace") strings = shorterString(stringInBytes) for val in strings: splittedCode.append( bytes.decode(val, encoding='utf-8', errors="replace")) i = 0 for val in splittedCode: q = Query.into(db.tables["CodeFragment"]).columns( "fileId", "order", "text", "metaphone").insert(fileId, i, splittedCode[i], db.func["metaphone"](splittedCode[i], 255)) executeQ(q) i += 1 #for i in range(0, (len(codeInBytes)//255)+1): # splittedCode.append(bytes.decode(codeInBytes[0+(i*255):min(255*(i+1), len(codeInBytes))], encoding='utf-8')) # q = Query.into(db.tables["CodeFragment"]).columns("fileId", "order", "text", "metaphone").insert(fileId, i, splittedCode[i], db.func["metaphone"](splittedCode[i], 255)) # executeQ(q) os.chdir(cwd) return (id, fileId, False)
def trueAlgo(fileId, needList=False, allMetaphones=None): allTime = time.time() fileId = int(fileId) metaphones = {} texts = {} files = {} extensions = {} if not allMetaphones: metaphones, texts, files, extensions = getAllMetaphones() else: metaphones = allMetaphones[0] texts = allMetaphones[1] files = allMetaphones[2] extensions = allMetaphones[3] currentExtension = extensions[fileId] fileMetaphones = metaphones[fileId] distances = [] stringsFile = [] stringsRelevant = [] stringsFrom = [] stringsFromNum = [] result = [] combo = [] counterF = 0 lastFile = -1 comboCounter = 1 start_time = time.time() allKeys = list(metaphones.keys()) dropKeys = [] for k in allKeys: if k == fileId: dropKeys.append(k) continue if currentExtension != extensions[k]: #print("skip! ", currentExtension, " != ", extensions[k], files[k]) dropKeys.append(k) continue for k in dropKeys: allKeys.remove(k) for val in fileMetaphones: stringsFile.append(texts[fileId][counterF]) if val == "": counterF += 1 stringsRelevant.append("_empty_") result.append("skipped") distances.append(255) stringsFromNum.append(-1) stringsFrom.append("") combo.append(0) continue minD = 255 stringsRelevant.append("_empty_") result.append("unique") curFile = -1 for k in allKeys: counter = 0 for val2 in metaphones[k]: if val2 == "": counter += 1 continue if abs(len(val2) - len(val)) > 3: counter += 1 continue if len(val) > 1 and len(val2) > 1 and ((val2[0] != val[0]) or (val2[1] != val[1])): counter += 1 continue maxD = min(len(val), len(val2), 7) // 2 + 2 q = Query.select(db.func["levenshtein_less_equal"]( str(val), val2, min(len(val), len(val2), 7) // 2 + 1)) rows = executeQ(q, True) for row in rows: if row[0] != maxD: if minD > row[0]: curFile = k stringsRelevant[len(stringsRelevant) - 1] = texts[k][counter] if row[0] == 0 or row[0] == 1: result[len(result) - 1] = "plagiarism" else: result[len(result) - 1] = "similar" minD = min(minD, row[0]) counter += 1 if minD == 0 or minD == 1: break #!!!!!!!!!!!!!!!!!! #if (minD == 2 or minD == 3) and lastFile == k: # break if minD == 0 or minD == 1: break #!!!!!!!!!!!!!!!!!! #if (minD == 2 or minD == 3) and lastFile == k: # break distances.append(minD) stringsFromNum.append(curFile) stringsFrom.append(files[curFile] if curFile != -1 else "") if (minD == 0 or minD == 1): allKeys.remove(curFile) allKeys.insert(0, curFile) if lastFile == -1: combo.append(1) else: if lastFile == curFile: comboCounter += 1 else: comboCounter = 1 combo.append(comboCounter) lastFile = curFile counterF += 1 coincidences = 0 empty = 0 currentCombo = 0 comboToAdd = 0 for i in reversed(range(len(stringsFile))): #print( # stringsFile[i], " ||| ", stringsRelevant[i], " ||| ", distances[i], # result[i] #) if result[i] == "plagiarism": coincidences += 1 elif result[i] == "similar": coincidences += 0.1 elif result[i] == "skipped": empty += 1 if currentCombo > 0: if combo[i] == 0: continue combo[i] += comboToAdd currentCombo -= 1 comboToAdd += 1 else: if combo[i] > 1: currentCombo = combo[i] - 1 comboToAdd = 1 divisor = (len(stringsFile) - empty) if divisor == 0: divisor = 1 print("RESULT: ", round(coincidences / divisor * 100, 1)) fullResult = [ stringsFile, stringsRelevant, stringsFrom, combo, distances, result, round(coincidences / divisor * 100, 1), files[fileId] ] print("--- %s seconds ---" % (time.time() - start_time)) if needList: return fullResult else: jsonResult = json.dumps(fullResult) q = Query.into(db.tables["SearchResult"]).columns( "result", "createdAt").insert(jsonResult, functions.CurTimestamp()) executeQ(q) return jsonify(fullResult)