예제 #1
0
def getFileId(filepath, matchedFilesdict):
    from fup.utils.commun import delPunctuationMarks

    file = filepath.split('\\')[-1]
    #print('getFileIdfunc: ', file, matchedFilesdict)
    for kid, vfname in matchedFilesdict.items():
        if delPunctuationMarks(vfname) == delPunctuationMarks(file):
            #print(kid, vfname)
            return kid, vfname
예제 #2
0
def checkFileInfo(fileinfo):
    import re
    import pandas as pd
    from fup.utils.dbwrap import sql2df
    from fup.helpers.files import delDirsnotindb
    from fup.utils.commun import delPunctuationMarks

    #print("fileinfo ",fileinfo)

    histdf = sql2df('fileshistory')

    filedict = {}
    for k, v in fileinfo.items():
        filedict[k] = [v]

    filedf = pd.DataFrame.from_dict(filedict)
    #print("yuhuu filedict", filedict)

    merged_name = filedf.merge(histdf,
                               left_on=['FileName'],
                               right_on=['FileName'],
                               suffixes=('', '_y'))
    colstodel = [
        col for col in merged_name.columns.tolist() if re.search('_y', col)
    ]
    for col in colstodel:
        merged_name.drop(col, axis=1, inplace=True)

    merged_size = filedf.merge(histdf,
                               left_on=['FileSizeBytes'],
                               right_on=['FileSizeBytes'],
                               suffixes=('', '_y'))
    colstodel = [
        col for col in merged_size.columns.tolist() if re.search('_y', col)
    ]
    for col in colstodel:
        merged_size.drop(col, axis=1, inplace=True)

    merged_mtime = filedf.merge(histdf,
                                left_on=['ModificationDate'],
                                right_on=['ModificationDate'],
                                suffixes=('', '_y'))
    colstodel = [
        col for col in merged_mtime.columns.tolist() if re.search('_y', col)
    ]
    for col in colstodel:
        merged_mtime.drop(col, axis=1, inplace=True)

    if (merged_name.shape[0] == 0):
        return True
    elif (merged_name.shape[0] == 0) and (merged_size.shape[0] == 0):
        return True
    elif (merged_name.shape[0] == 0) and (merged_size.shape[0]
                                          == 0) and (merged_mtime.shape[0]
                                                     == 0):
        return True
    else:
        try:
            filename_merge = merged_name['FileName'].tolist()[0]

            for fname in histdf['FileName']:

                if delPunctuationMarks(fname) == delPunctuationMarks(
                        filename_merge):

                    histdf_filtered = histdf[histdf['FileName'] == fname]

                    filename_hist = histdf_filtered['FileName'].tolist()
                    batchid_hist = histdf_filtered['AddedInBatch'].tolist()
                    fileid_hist = histdf_filtered['FileID'].tolist()

                    delDirsnotindb()

                    response = "File '{}' was probably added before! Check BID_{}, FID_{}".format(
                        filename_hist[0], batchid_hist[0], fileid_hist[0])
                    #print(response)
                    return response
        except Exception as e:
            return str(
                "Probably files in NEW are already inserted. Got: {}".format(
                    e))