Example #1
0
File: pre.py Project: jestory/pynab
def parseNzedbirc(unformattedPre):
    PRE_REGEX = regex.compile(
        '(?P<preType>.+): \[DT: (?<pretime>.+)\]\[TT: (?P<name>.+)\]\[SC: (?P<source>.+)\]\[CT: (?P<category>.+)\]\[RQ: (?P<request>.+)\]\[SZ: (?P<size>.+)\]\[FL: (?P<files>.+)\]\[FN: (?P<filename>.+)\]')

    formattedPre = {}

    try:
        formattedPre = PRE_REGEX.search(unformattedPre).groupdict()
    except Exception as e:
        log.debug("pre: Error parsing nzedbirc - {}".format(e))

    if formattedPre['preType'] == "NUK":
        formattedPre['nuked'] = True
    else:
        formattedPre['nuked'] = False

    #Deal with splitting out requests if they exist
    if formattedPre['request'] != "N/A":
        formattedPre['requestid'] = formattedPre['request'].split(":")[0]
        formattedPre['requestgroup'] = formattedPre['request'].split(":")[1]
    else:
        formattedPre['requestid'] = None

    formattedPre['searchname'] = releases.clean_release_name(formattedPre['name'])

    #remove any columns we dont need. Perhaps a way to filter these out via regex? Or a way to ignore via sqlalchemy
    formattedPre.pop("preType", None)
    formattedPre.pop("size", None)
    formattedPre.pop("files", None)
    formattedPre.pop("request", None)

    return formattedPre
Example #2
0
File: pre.py Project: sqw23/pynab
def parseNzedbirc(unformattedPre):
    PRE_REGEX = regex.compile(
        '(?P<preType>.+): \[DT: (?<pretime>.+)\] \[TT: (?P<name>.+)\] \[SC: (?P<source>.+)\] \[CT: (?P<category>.+)\] \[RQ: (?P<request>.+)\] \[SZ: (?P<size>.+)\] \[FL: (?P<files>.+)\] \[FN: (?P<filename>.+)\]')

    formattedPre = {}

    try:
        formattedPre = PRE_REGEX.search(unformattedPre).groupdict()
    except Exception as e:
        log.debug("pre: Error parsing nzedbirc - {}".format(e))

    if formattedPre['preType'] == "NUK":
        formattedPre['nuked'] = True
    else:
        formattedPre['nuked'] = False

    #Deal with splitting out requests if they exist
    if formattedPre['request'] != "N/A":
        formattedPre['requestid'] = formattedPre['request'].split(":")[0]
        formattedPre['requestgroup'] = formattedPre['request'].split(":")[1]
    else:
        formattedPre['requestid'] = None

    formattedPre['searchname'] = releases.clean_release_name(formattedPre['name'])

    #remove any columns we dont need. Perhaps a way to filter these out via regex? Or a way to ignore via sqlalchemy
    formattedPre.pop("preType", None)
    formattedPre.pop("size", None)
    formattedPre.pop("files", None)
    formattedPre.pop("request", None)

    return formattedPre
Example #3
0
File: pre.py Project: gkoh/pynab
def parseNzedbirc(unformattedPre):
    CLEAN_REGEX = regex.compile('[\x02\x0F\x16\x1D\x1F]|\x03(\d{,2}(,\d{,2})?)?')
    PRE_REGEX = regex.compile(
        '(?P<preType>.+): \[DT: (?<pretime>.+)\] \[TT: (?P<name>.+)\] \[SC: (?P<source>.+)\] \[CT: (?P<category>.+)\] \[RQ: (?P<request>.+)\] \[SZ: (?P<size>.+)\] \[FL: (?P<files>.+)\] \[FN: (?P<filename>.+)\]')

    formattedPre = {}

    if unformattedPre is not None:
        try:
            cleanPre = regex.sub(CLEAN_REGEX, '', unformattedPre);
            formattedPre = PRE_REGEX.search(cleanPre).groupdict()
        except Exception as e:
            log.debug("pre: Message prior to error - {}".format(unformattedPre))
            log.debug("pre: Error parsing nzedbirc - {}".format(e))
            formattedPre = None

    if formattedPre is not None:
        if formattedPre['preType'] == "NUK":
            formattedPre['nuked'] = True
        else:
            formattedPre['nuked'] = False

        #Deal with splitting out requests if they exist
        if formattedPre['request'] != "N/A":
            formattedPre['requestid'] = formattedPre['request'].split(":")[0]
            formattedPre['requestgroup'] = formattedPre['request'].split(":")[1]
        else:
            formattedPre['requestid'] = None

        formattedPre['searchname'] = releases.clean_release_name(formattedPre['name'])

        #remove any columns we dont need. Perhaps a way to filter these out via regex? Or a way to ignore via sqlalchemy
        formattedPre.pop("preType", None)
        formattedPre.pop("size", None)
        formattedPre.pop("files", None)
        formattedPre.pop("request", None)

        return formattedPre
    else:
        return None
Example #4
0
def process(precsv, processingFile=None):
    ordering = [
        'name', 'filename', 'nuked', 'category', 'pretime', 'source',
        'requestid', 'requestgroup', 'searchname'
    ]

    # Clean up the file a bit.
    precsv.replace("'", "", inplace=True, regex=True)
    precsv["nuked"].replace("2", "0", inplace=True)
    precsv["nuked"].replace("3", "1", inplace=True)
    precsv["nuked"].replace("4", "1", inplace=True)
    precsv["nuked"].replace("5", "1", inplace=True)
    precsv["nuked"].replace("69", "0", inplace=True)
    precsv.replace(".\\N$", '', inplace=True, regex=True)

    # Sometimes there are duplicates within the table itself, remove them
    precsv.drop_duplicates(subset='name', take_last=True, inplace=True)

    # Add clean searchname column
    precsv['searchname'] = precsv['name'].map(
        lambda name: releases.clean_release_name(name))

    # Drop the pres without requestid's
    precsv = precsv[precsv.requestid != '0']

    # Create a list of names to check if they exist
    names = list(precsv.name)

    # Query to find any existing pres, we need to delete them so COPY doesn't fail
    prenamelist = []
    with db_session() as db:

        if names:
            pres = db.query(Pre).filter(Pre.name.in_(names)).all()

            for pre in pres:
                prenamelist.append(pre.name)

        data = io.StringIO()
        precsv.to_csv(data, index=False, header=False)

        # Delete any pres found as we are essentially going to update them
        if prenamelist:
            for pre in pres:
                db.delete(pre)
            db.commit()
            print("pre-import: Deleted {} pres that will re-inserted".format(
                len(prenamelist)))
        else:
            print(
                "pre-import: File clean, no pres need to be deleted before re-insert"
            )

    try:
        if processingFile is not None:
            print("pre-import: Attempting to add {} to the database".format(
                processingFile['lastfile']))

            data.seek(0)
            copy_file(engine, data, ordering, Pre)

            # Write out the last pre csv name so it can be restarted later without downloading all the pres.
            with open('lastfile.json', 'w') as outfile:
                json.dump({'lastfile': int(processingFile['lastfile'])},
                          outfile)

        else:
            data.seek(0)
            copy_file(engine, data, ordering, Pre)
            data.close()
            print("pre-import: Chunk import successful")

    except Exception as e:
        print("pre-import: Error inserting into database - {}".format(e))

        if processingFile is not None:
            INSERTFAILS.append(processingFile['lastfile'])
        else:
            print("pre-import: Error processing chunk")
Example #5
0
def process(precsv, processingFile=None):
    ordering = ['name', 'filename', 'nuked', 'category', 'pretime', 'source', 'requestid', 'requestgroup', 'searchname']

    # Clean up the file a bit.
    precsv.replace("'", "", inplace=True, regex=True)
    precsv["nuked"].replace("2", "0", inplace=True)
    precsv["nuked"].replace("3", "1", inplace=True)
    precsv["nuked"].replace("4", "1", inplace=True)
    precsv["nuked"].replace("5", "1", inplace=True)
    precsv["nuked"].replace("69", "0", inplace=True)
    precsv.replace(".\\N$", '', inplace=True, regex=True)

    # Sometimes there are duplicates within the table itself, remove them
    precsv.drop_duplicates(subset='name', take_last=True, inplace=True)

    # Add clean searchname column
    precsv['searchname'] = precsv['name'].map(lambda name: releases.clean_release_name(name))

    # Drop the pres without requestid's
    precsv = precsv[precsv.requestid != '0']

    # Create a list of names to check if they exist
    names = list(precsv.name)

    # Query to find any existing pres, we need to delete them so COPY doesn't fail
    prenamelist = []
    with db_session() as db:

        if names:
            pres = db.query(Pre).filter(Pre.name.in_(names)).all()

            for pre in pres:
                prenamelist.append(pre.name)

        data = io.StringIO()
        precsv.to_csv(data, index=False, header=False)

        # Delete any pres found as we are essentially going to update them
        if prenamelist:
            for pre in pres:
                db.delete(pre)
            db.commit()
            print("pre-import: Deleted {} pres that will re-inserted".format(len(prenamelist)))
        else:
            print("pre-import: File clean, no pres need to be deleted before re-insert")

    try:
        if processingFile is not None:
            print("pre-import: Attempting to add {} to the database".format(processingFile['lastfile']))

            data.seek(0)
            copy_file(engine, data, ordering, Pre)

            # Write out the last pre csv name so it can be restarted later without downloading all the pres.
            with open('lastfile.json', 'w') as outfile:
                json.dump({'lastfile': int(processingFile['lastfile'])}, outfile)

        else:
            data.seek(0)
            copy_file(engine, data, ordering, Pre)
            data.close()
            print("pre-import: Chunk import successful")

    except Exception as e:
        print("pre-import: Error inserting into database - {}".format(e))

        if processingFile is not None:
            INSERTFAILS.append(processingFile['lastfile'])
        else:
            print("pre-import: Error processing chunk")