Example #1
0
def search(data):
    """
    Search OMDB for an id based on a name/year.

    :param data: {name, year}
    :return: id
    """

    name = data['name']
    year = data['year']

    # if we managed to parse the year from the name
    # include it, since it'll narrow results
    if year:
        year_query = '&y={}'.format(year.replace('(', '').replace(')', ''))
    else:
        year_query = ''

    try:
        result = requests.get(TMDB_SEARCH_URL + name + year_query).json()
    except:
        log.critical('There was a problem accessing the IMDB API page.')
        return None

    if 'results' in result:
        for movie in result['results']:
            ratio = difflib.SequenceMatcher(
                None, pynab.ids.clean_name(name),
                pynab.ids.clean_name(movie['title'])).ratio()
            if ratio > 0.8 and year in movie['release_date']:
                temp = requests.get('https://api.themoviedb.org/3/movie/{}'.
                                    format(movie['id']) + '?api_key=' +
                                    API_KEY).json()
                return temp['imdb_id']
    return None
Example #2
0
def search(data):
    """
    Search OMDB for an id based on a name/year.

    :param data: {name, year}
    :return: id
    """

    name = data['name']
    year = data['year']

    # if we managed to parse the year from the name
    # include it, since it'll narrow results
    if year:
        year_query = '&y={}'.format(year.replace('(', '').replace(')', ''))
    else:
        year_query = ''

    try:
        result = requests.get(OMDB_SEARCH_URL + name + year_query).json()
    except:
        log.critical('There was a problem accessing the IMDB API page.')
        return None

    if 'Search' in result:
        for movie in result['Search']:
            # doublecheck, but the api should've searched properly
            ratio = difflib.SequenceMatcher(None, pynab.ids.clean_name(name), pynab.ids.clean_name(movie['Title'])).ratio()
            if ratio > 0.8 and year == movie['Year'] and movie['Type'] == 'movie':
                return movie['imdbID']

    return None
Example #3
0
def search(data):
    """
    Search OMDB for an id based on a name/year.

    :param data: {name, year}
    :return: id
    """

    name = data['name']
    year = data['year']

    # if we managed to parse the year from the name
    # include it, since it'll narrow results
    if year:
        year_query = '&y={}'.format(year.replace('(', '').replace(')', ''))
    else:
        year_query = ''

    try:
        result = requests.get(OMDB_SEARCH_URL + name + year_query).json()
    except:
        log.critical('There was a problem accessing the IMDB API page.')
        return None

    if 'Search' in result:
        for movie in result['Search']:
            # doublecheck, but the api should've searched properly
            ratio = difflib.SequenceMatcher(
                None, pynab.ids.clean_name(name),
                pynab.ids.clean_name(movie['Title'])).ratio()
            if ratio > 0.8 and year == movie['Year'] and movie[
                    'Type'] == 'movie':
                return movie['imdbID']

    return None
Example #4
0
def search(name, year):
    """Search OMDB for a movie and return the IMDB ID."""

    # if we managed to parse the year from the name
    # include it, since it'll narrow results
    if year:
        year_query = '&y={}'.format(year.replace('(', '').replace(')', ''))
    else:
        year_query = ''

    data = {}

    try:
        r = requests.get(OMDB_SEARCH_URL + name + year_query)
        data = r.json()
    except:
        log.critical('There was a problem accessing the IMDB API page.')
        return None

    if 'Search' in data:
        for movie in data['Search']:
            # doublecheck, but the api should've searched properly
            ratio = difflib.SequenceMatcher(None, clean_name(name), clean_name(movie['Title'])).ratio()
            if ratio > 0.8 and year == movie['Year'] and movie['Type'] == 'movie':
                return movie
Example #5
0
def search_lxml(show, content):
    """Search TVRage online API for show data."""
    try:
        tree = etree.fromstring(content)
    except:
        log.critical('Problem parsing XML with lxml')
        return None

    matches = defaultdict(list)
    # parse show names in the same order as returned by tvrage, first one is usually the good one
    for xml_show in XPATH_SHOW(tree):
        for name in extract_names(xml_show):
            ratio = int(difflib.SequenceMatcher(None, show['clean_name'], clean_name(name)).ratio() * 100)
            if ratio == 100:
                return xmltodict.parse(etree.tostring(xml_show))['show']
            matches[ratio].append(xml_show)
                
    # if no 100% is found, check highest ratio matches
    for ratio, xml_matches in sorted(matches.items(), reverse=True):
        for xml_match in xml_matches:
            if ratio >= 80:
                return xmltodict.parse(etree.tostring(xml_match))['show']
            elif 80 > ratio > 60:
                if 'country' in show and show['country'] and XPATH_COUNTRY(xml_match):
                    if str.lower(show['country']) == str.lower(XPATH_COUNTRY(xml_match)[0]):
                        return xmltodict.parse(etree.tostring(xml_match))['show']
Example #6
0
def process_movies():
    try:
        return pynab.ids.process(
            'movie',
            interfaces=config.postprocess.get('process_movies'),
            limit=500)
    except Exception as e:
        log.critical(traceback.format_exc())
        raise Exception
Example #7
0
File: api.py Project: tbetton/pynab
def daemonize(pidfile):
    try:
        import traceback
        from daemonize import Daemonize
        daemon = Daemonize(app='pynab', pid=pidfile, action=main)
        daemon.start()
    except SystemExit:
        raise
    except:
        log.critical(traceback.format_exc())
Example #8
0
File: nzbs.py Project: sqw23/pynab
def get_nzb_details(nzb):
    """Returns a JSON-like Python dict of NZB contents, including extra information
    such as a list of any nfos/rars that the NZB references."""

    try:
        # using the html parser here instead of the straight lxml might be slower
        # but some of the nzbs spewed forth by newznab are broken and contain
        # non-xml entities, ie. ²
        # this breaks the normal lxml parser
        tree = html.fromstring(gzip.decompress(nzb.data))
    except Exception as e:
        log.critical('nzbs: problem parsing XML with lxml: {}'.format(e))
        return None

    nfos = []
    sfvs = []
    rars = []
    pars = []
    zips = []

    rar_count = 0
    par_count = 0

    for file_subject in XPATH_FILE(tree):
        if rar_part_regex.search(file_subject):
            rar_count += 1
        if nfo_regex.search(
                file_subject) and not metadata_regex.search(file_subject):
            nfos.append(filexml_to_dict(file_subject.getparent()))
        if sfv_regex.search(file_subject):
            sfvs.append(filexml_to_dict(file_subject.getparent()))
        if rar_regex.search(
                file_subject) and not metadata_regex.search(file_subject):
            rars.append(filexml_to_dict(file_subject.getparent()))
        if par2_regex.search(file_subject):
            par_count += 1
            if not par_vol_regex.search(file_subject):
                pars.append(filexml_to_dict(file_subject.getparent()))
        if zip_regex.search(
                file_subject) and not metadata_regex.search(file_subject):
            zips.append(filexml_to_dict(file_subject.getparent()))

    return {
        'nfos': nfos,
        'sfvs': sfvs,
        'rars': rars,
        'pars': pars,
        'zips': zips,
        'rar_count': rar_count,
        'par_count': par_count,
    }
Example #9
0
def daemonize(pidfile):
    try:
        import traceback
        from daemonize import Daemonize

        fds = []
        if log_descriptor:
            fds = [log_descriptor]

        daemon = Daemonize(app='pynab', pid=pidfile, action=main, keep_fds=fds)
        daemon.start()
    except SystemExit:
        raise
    except:
        log.critical(traceback.format_exc())
Example #10
0
def get_nzb_details(nzb):
    """Returns a JSON-like Python dict of NZB contents, including extra information
    such as a list of any nfos/rars that the NZB references."""

    try:
        # using the html parser here instead of the straight lxml might be slower
        # but some of the nzbs spewed forth by newznab are broken and contain
        # non-xml entities, ie. ²
        # this breaks the normal lxml parser
        tree = html.fromstring(gzip.decompress(nzb.data))
    except Exception as e:
        log.critical('nzbs: problem parsing XML with lxml: {}'.format(e))
        return None

    nfos = []
    sfvs = []
    rars = []
    pars = []
    zips = []

    rar_count = 0
    par_count = 0

    for file_subject in XPATH_FILE(tree):
        if rar_part_regex.search(file_subject):
            rar_count += 1
        if nfo_regex.search(file_subject) and not metadata_regex.search(file_subject):
            nfos.append(filexml_to_dict(file_subject.getparent()))
        if sfv_regex.search(file_subject):
            sfvs.append(filexml_to_dict(file_subject.getparent()))
        if rar_regex.search(file_subject) and not metadata_regex.search(file_subject):
            rars.append(filexml_to_dict(file_subject.getparent()))
        if par2_regex.search(file_subject):
            par_count += 1
            if not par_vol_regex.search(file_subject):
                pars.append(filexml_to_dict(file_subject.getparent()))
        if zip_regex.search(file_subject) and not metadata_regex.search(file_subject):
            zips.append(filexml_to_dict(file_subject.getparent()))

    return {
        'nfos': nfos,
        'sfvs': sfvs,
        'rars': rars,
        'pars': pars,
        'zips': zips,
        'rar_count': rar_count,
        'par_count': par_count,
    }
Example #11
0
def get_size(nzb):
    """Returns the size of a release (in bytes) as given by the NZB, compressed."""
    try:
        # using the html parser here instead of the straight lxml might be slower
        # but some of the nzbs spewed forth by newznab are broken and contain
        # non-xml entities, ie. ²
        # this breaks the normal lxml parser
        tree = html.fromstring(gzip.decompress(nzb.data))
    except Exception as e:
        log.critical('nzbs: problem parsing XML with lxml: {}'.format(e))
        return None

    size = 0
    for bytes in XPATH_BYTES(tree):
        try:
            size += int(bytes)
        except:
            # too bad, there was a problem
            pass

    return size
Example #12
0
File: nzbs.py Project: sqw23/pynab
def get_size(nzb):
    """Returns the size of a release (in bytes) as given by the NZB, compressed."""
    try:
        # using the html parser here instead of the straight lxml might be slower
        # but some of the nzbs spewed forth by newznab are broken and contain
        # non-xml entities, ie. ²
        # this breaks the normal lxml parser
        tree = html.fromstring(gzip.decompress(nzb.data))
    except Exception as e:
        log.critical('nzbs: problem parsing XML with lxml: {}'.format(e))
        return None

    size = 0
    for bytes in XPATH_BYTES(tree):
        try:
            size += int(bytes)
        except:
            # too bad, there was a problem
            pass

    return size
Example #13
0
def process_requests():
    try:
        return pynab.requests.process(500)
    except Exception as e:
        log.critical(traceback.format_exc())
        raise Exception
Example #14
0
def process_movies():
    try:
        return pynab.ids.process("movie", interfaces=config.postprocess.get("process_movies"), limit=500)
    except Exception as e:
        log.critical(traceback.format_exc())
        raise Exception
Example #15
0
def process_requests():
    try:
        return pynab.requests.process(500)
    except Exception as e:
        log.critical(traceback.format_exc())
        raise Exception
Example #16
0
def process_imdb():
    try:
        return pynab.imdb.process(500)
    except Exception as e:
        log.critical(traceback.format_exc())
        raise Exception
Example #17
0
def save_all(parts):
    """Save a set of parts to the DB, in a batch if possible."""
    if parts:
        start = time.time()
        group_name = list(parts.values())[0]['group_name']

        with db_session() as db:
            # this is a little tricky. parts have no uniqueness at all.
            # no uniqid and the posted dates can change since it's based off the first
            # segment that we see in that part, which is different for each scan.
            # what we do is get the next-closest thing (subject+author+group) and
            # order it by oldest first, so when it's building the dict the newest parts
            # end on top (which are the most likely to be being saved to).

            # realistically, it shouldn't be a big problem - parts aren't stored in the db
            # for very long anyway, and they're only a problem while there. saving 500 million
            # segments to the db is probably not a great idea anyway.
            existing_parts = dict(
                ((part.hash, part) for part in
                 db.query(Part.id, Part.hash).filter(Part.hash.in_(parts.keys())).filter(
                     Part.group_name == group_name).order_by(Part.posted.asc()).all()
                )
            )

            part_inserts = []
            for hash, part in parts.items():
                existing_part = existing_parts.get(hash, None)
                if not existing_part:
                    segments = part.pop('segments')
                    part_inserts.append(part)
                    part['segments'] = segments

            if part_inserts:
                ordering = ['hash', 'subject', 'group_name', 'posted', 'posted_by', 'total_segments', 'xref']

                s = io.StringIO()
                for part in part_inserts:
                    for item in ordering:
                        if item == 'posted':
                            s.write('"' + part[item].replace(tzinfo=None).strftime('%Y-%m-%d %H:%M:%S').replace('"',
                                                                                                                '\\"') + '",')
                        elif item == 'xref':
                            # leave off the comma
                            s.write('"' + part[item].encode('utf-8', 'replace').decode('utf-8').replace('"', '\\"') + '"')
                        else:
                            s.write('"' + str(part[item]).encode('utf-8', 'replace').decode().replace('"', '\\"') + '",')
                    s.write("\n")
                s.seek(0)

                if not copy_file(engine, s, ordering, Part):
                    return False

                s.close()
                db.close()

        with db_session() as db:
            existing_parts = dict(
                ((part.hash, part) for part in
                 db.query(Part)
                .options(
                     subqueryload('segments'),
                     Load(Part).load_only(Part.id, Part.hash),
                     Load(Segment).load_only(Segment.id, Segment.segment)
                 )
                .filter(Part.hash.in_(parts.keys()))
                .filter(Part.group_name == group_name)
                .order_by(Part.posted.asc())
                .all()
                )
            )

            segment_inserts = []
            for hash, part in parts.items():
                existing_part = existing_parts.get(hash, None)
                if existing_part:
                    segments = dict(((s.segment, s) for s in existing_part.segments))
                    for segment_number, segment in part['segments'].items():
                        if int(segment_number) not in segments:
                            segment['part_id'] = existing_part.id
                            segment_inserts.append(segment)
                        else:
                            # we hit a duplicate message for a part
                            # kinda wish people would stop reposting shit constantly
                            pass
                else:
                    log.critical(
                        'parts: part didn\'t exist when we went to save it. backfilling with dead_binary_age not set to 0?')
                    return False

            if segment_inserts:
                ordering = ['segment', 'size', 'message_id', 'part_id']

                s = io.StringIO()
                for segment in segment_inserts:
                    for item in ordering:
                        if item == 'part_id':
                            # leave off the tab
                            s.write('"' + str(segment[item]).replace('"', '\\"') + '"')
                        else:
                            s.write('"' + str(segment[item]).encode('utf-8', 'replace').decode('utf-8').replace('"', '\\"') + '",')
                    s.write("\n")
                s.seek(0)

                if not copy_file(engine, s, ordering, Segment):
                    return False

                s.close()
                db.close()

        end = time.time()

        log.debug('parts: saved {} parts and {} segments in {:.2f}s'.format(
            len(part_inserts),
            len(segment_inserts),
            end - start
        ))

        del part_inserts[:]
        del segment_inserts[:]

    return True
Example #18
0
def save_all(parts):
    """Save a set of parts to the DB, in a batch if possible."""
    if parts:
        start = time.time()
        group_name = list(parts.values())[0]['group_name']

        with db_session() as db:
            # this is a little tricky. parts have no uniqueness at all.
            # no uniqid and the posted dates can change since it's based off the first
            # segment that we see in that part, which is different for each scan.
            # what we do is get the next-closest thing (subject+author+group) and
            # order it by oldest first, so when it's building the dict the newest parts
            # end on top (which are the most likely to be being saved to).

            # realistically, it shouldn't be a big problem - parts aren't stored in the db
            # for very long anyway, and they're only a problem while there. saving 500 million
            # segments to the db is probably not a great idea anyway.
            existing_parts = dict(((part.hash, part) for part in db.query(
                Part.id, Part.hash).filter(Part.hash.in_(parts.keys())).filter(
                    Part.group_name == group_name).order_by(
                        Part.posted.asc()).all()))

            part_inserts = []
            for hash, part in parts.items():
                existing_part = existing_parts.get(hash, None)
                if not existing_part:
                    segments = part.pop('segments')
                    part_inserts.append(part)
                    part['segments'] = segments

            if part_inserts:
                ordering = [
                    'hash', 'subject', 'group_name', 'posted', 'posted_by',
                    'total_segments', 'xref'
                ]

                s = io.StringIO()
                for part in part_inserts:
                    for item in ordering:
                        if item == 'posted':
                            s.write('"' + part[item].replace(
                                tzinfo=None).strftime(
                                    '%Y-%m-%d %H:%M:%S').replace('"', '\\"') +
                                    '",')
                        elif item == 'xref':
                            # leave off the comma
                            s.write('"' +
                                    part[item].encode('utf-8', 'replace').
                                    decode('utf-8').replace('"', '\\"') + '"')
                        else:
                            s.write('"' +
                                    str(part[item]).encode('utf-8', 'replace').
                                    decode().replace('"', '\\"') + '",')
                    s.write("\n")
                s.seek(0)

                if not copy_file(engine, s, ordering, Part):
                    return False

                s.close()
                db.close()

        with db_session() as db:
            existing_parts = dict(
                ((part.hash, part) for part in db.query(Part).options(
                    subqueryload('segments'),
                    Load(Part).load_only(Part.id, Part.hash),
                    Load(Segment).load_only(Segment.id, Segment.segment)).
                 filter(Part.hash.in_(parts.keys())).filter(
                     Part.group_name == group_name).order_by(
                         Part.posted.asc()).all()))

            segment_inserts = []
            for hash, part in parts.items():
                existing_part = existing_parts.get(hash, None)
                if existing_part:
                    segments = dict(
                        ((s.segment, s) for s in existing_part.segments))
                    for segment_number, segment in part['segments'].items():
                        if int(segment_number) not in segments:
                            segment['part_id'] = existing_part.id
                            segment_inserts.append(segment)
                        else:
                            # we hit a duplicate message for a part
                            # kinda wish people would stop reposting shit constantly
                            pass
                else:
                    log.critical(
                        'parts: part didn\'t exist when we went to save it. backfilling with dead_binary_age not set to 0?'
                    )
                    return False

            if segment_inserts:
                ordering = ['segment', 'size', 'message_id', 'part_id']

                s = io.StringIO()
                for segment in segment_inserts:
                    for item in ordering:
                        if item == 'part_id':
                            # leave off the tab
                            s.write('"' +
                                    str(segment[item]).replace('"', '\\"') +
                                    '"')
                        else:
                            s.write(
                                '"' +
                                str(segment[item]).encode('utf-8', 'replace').
                                decode('utf-8').replace('"', '\\"') + '",')
                    s.write("\n")
                s.seek(0)

                if not copy_file(engine, s, ordering, Segment):
                    return False

                s.close()
                db.close()

        end = time.time()

        log.debug('parts: saved {} parts and {} segments in {:.2f}s'.format(
            len(part_inserts), len(segment_inserts), end - start))

        del part_inserts[:]
        del segment_inserts[:]

    return True