def getFileId(filepath, matchedFilesdict): from fup.utils.commun import delPunctuationMarks file = filepath.split('\\')[-1] #print('getFileIdfunc: ', file, matchedFilesdict) for kid, vfname in matchedFilesdict.items(): if delPunctuationMarks(vfname) == delPunctuationMarks(file): #print(kid, vfname) return kid, vfname
def checkFileInfo(fileinfo): import re import pandas as pd from fup.utils.dbwrap import sql2df from fup.helpers.files import delDirsnotindb from fup.utils.commun import delPunctuationMarks #print("fileinfo ",fileinfo) histdf = sql2df('fileshistory') filedict = {} for k, v in fileinfo.items(): filedict[k] = [v] filedf = pd.DataFrame.from_dict(filedict) #print("yuhuu filedict", filedict) merged_name = filedf.merge(histdf, left_on=['FileName'], right_on=['FileName'], suffixes=('', '_y')) colstodel = [ col for col in merged_name.columns.tolist() if re.search('_y', col) ] for col in colstodel: merged_name.drop(col, axis=1, inplace=True) merged_size = filedf.merge(histdf, left_on=['FileSizeBytes'], right_on=['FileSizeBytes'], suffixes=('', '_y')) colstodel = [ col for col in merged_size.columns.tolist() if re.search('_y', col) ] for col in colstodel: merged_size.drop(col, axis=1, inplace=True) merged_mtime = filedf.merge(histdf, left_on=['ModificationDate'], right_on=['ModificationDate'], suffixes=('', '_y')) colstodel = [ col for col in merged_mtime.columns.tolist() if re.search('_y', col) ] for col in colstodel: merged_mtime.drop(col, axis=1, inplace=True) if (merged_name.shape[0] == 0): return True elif (merged_name.shape[0] == 0) and (merged_size.shape[0] == 0): return True elif (merged_name.shape[0] == 0) and (merged_size.shape[0] == 0) and (merged_mtime.shape[0] == 0): return True else: try: filename_merge = merged_name['FileName'].tolist()[0] for fname in histdf['FileName']: if delPunctuationMarks(fname) == delPunctuationMarks( filename_merge): histdf_filtered = histdf[histdf['FileName'] == fname] filename_hist = histdf_filtered['FileName'].tolist() batchid_hist = histdf_filtered['AddedInBatch'].tolist() fileid_hist = histdf_filtered['FileID'].tolist() delDirsnotindb() response = "File '{}' was probably added before! Check BID_{}, FID_{}".format( filename_hist[0], batchid_hist[0], fileid_hist[0]) #print(response) return response except Exception as e: return str( "Probably files in NEW are already inserted. Got: {}".format( e))