Example #1
0
    def get_video(self) -> None:
        """ Download video and write output to folder """
        self.create_path()

        output_folder = self.get_path()['path']
        COLLECTION.update_one({'_id': self.video_id},
                              {'$set': {
                                  'path': output_folder
                              }})

        y = YoutubeDl(output_folder=output_folder, mode=self.mode)
        ydl = y.ydl
        """
        Update the processing attribute to True whilst
        the video is being downloaded so that other
        processes don't attempt to download the video
        whilst it's in progress. Finally update the
        processed Flag to true and processing Flag to
        False
        """
        try:
            with ydl:
                self.vid.processing = True
                info_dict = ydl.extract_info(self.video_id)
                ydl.process_info(info_dict)
                self.vid.processing = False
                self.vid.processed = True
                self.vid.file_path = output_folder
                self.vid.save_video()
        except (DownloadError, SameFileError) as e:
            logger.error(e)
            logger.info('failed to download video %s', self.video_id)
    def write_nfo(self) -> None:
        """ Create an nfo file and write to path"""

        i = COLLECTION.find_one({'_id': self.video_id}, {
            '_id': True,
            'uploader': True,
            'title': True,
            'description': True,
            'upload_date': True
        })

        try:
            with open(os.path.join(APP_PATH, 'template.nfo'), 'r') as fl:
                template = Template(fl.read())
        except FileNotFoundError as e:
            logger.error(e)

        out_template = template.substitute(unique_id=i['_id'],
                                           studio=i['uploader'],
                                           title=i['title'],
                                           plot=i['description'],
                                           date_prem=i['upload_date'])
        path = os.path.join(self.get_path()['path'], 'tvshow.nfo')
        logger.info('writing nfo to path %s', path)
        with open(path, 'w') as fl:
            fl.write(out_template)
Example #3
0
    def save_video(self) -> Union[None, str]:
        """save the Video instance to the database

        Return:
            None: success
            str: test mode message
        """

        if self._mode == 'test':
            return 'Data cannot be saved in test mode'

        dct = self.__dict__
        doc = {}

        # pop the private modifier of the attribute name before saving
        for k, v in dct.items():
            if k == '_video_id':
                doc['_id'] = v
            elif k == '_processed':
                doc['Processed'] = v
            else:
                dk = k.lstrip('_')
                doc[dk] = v

        COLLECTION.replace_one({'_id': self._video_id}, doc)
        logger.info('%s updated', self._video_id)
        return None
Example #4
0
def main():
    start_count = len(data_in)
    end_count = 0
    while len(data_in) != 0:
        data = data_in.pop(0)
        push_object(data, 'analysis_objects0')
        end_count = end_count + 1
    logger.info('Imported %d out of %d', end_count, start_count)
 def get_thumbnail(self) -> None:
     # TODO fix path to use same as video_downloader
     """Download thumbnail to path
     """
     url = COLLECTION.find_one({'_id': self.video_id},
                               {'thumbnail': True})['thumbnail']
     data = requests.get(url, stream=True)
     image_data = Image.open(io.BytesIO(data.content))
     path = self.get_path()['path']
     logger.info('writing thumbnail to path %s', path)
     image_data.save(os.path.join(path, 'thumbnail.jpg'), 'jpeg')
Example #6
0
    def create_path(self) -> None:
        """Create folder for video from tags"""

        check = self.get_path()
        exists: bool = check.__getitem__('exists')
        path: str = check.__getitem__('path')

        if not exists:
            os.makedirs(path)
            logger.info('Creating path %s', path)
        else:
            logger.info('Existing path %s found', path)
Example #7
0
def get_video_info(video_id: str, tags: List = None) -> Union[None, dict]:
    """get metadata for video

    Args:
        video_id (str): The id of a youtube video
        tags (list): A list of tags

    Returns:
        None: if exception
        dict: example

        {'_id': 'video_id',
        'Processed': False,
        'title': 'video title',
        'uploader': 'video creator',
        'upload_date': 'video upload date (%Y%m%d)'
        'description': 'video description',
        'thumbnail': 'video thumbnail (url)',
        'tags': [tags]}

    Raises:
        TypeError: invalid video_id type

    """

    if not isinstance(video_id, str):
        raise TypeError(f'{video_id} should be str not {type(video_id)}')
    if tags is None:
        tags = []

    try:
        video_info = INFO_EXTRACTOR.extract_info(video_id)
    except youtube_dl.utils.DownloadError as e:
        logger.error(e)
        logger.info('%s is not a valid id', video_id)
        return None

    info = {
        '_id': video_info['id'],
        'Processed': False,
        'title': video_info['title'],
        'uploader': video_info['uploader'],
        'upload_date': datetime.strptime(video_info['upload_date'], '%Y%m%d'),
        'description': video_info['description'],
        'thumbnail': video_info['thumbnail'],
        'tags': tags
    }

    return info
Example #8
0
def create_analysis_object(qo):
    """
    Take a QueryObject and GET data from the page specified in
    the url_query attribute.

    :param qo: A list of QueryObjects
    :type qo: list[QueryObject]
    """
    try:
        for qu in qo:
            logger.info('pid: %d - processing: %s', os.getpid(), qu.title)
            ao = AnalysisObject(title=qu.title, info=get_info(qu.url_query))
            push_object(ao, 'analysis_objects0')

    except AttributeError as e:
        logger.error(e)
Example #9
0
    def __init__(self, mode: str, video_id: str, processed: bool, title: str,
                 uploader: str, upload_date: str, description: str,
                 thumbnail: str, tags: List[str], file_path: str):

        logger.info('Video object initialised')

        self._mode = mode
        self._video_id: str = video_id
        self._processed: bool = processed
        self._title: str = title
        self._uploader: str = uploader
        self._upload_date: str = upload_date
        self._description: str = description
        self._thumbnail: str = thumbnail
        self._tags: Optional[List[str]] = tags
        self._file_path: str = file_path
        self._processing: bool = False
Example #10
0
    def delete_video(self, check: bool = False) -> Union[Tuple, str]:
        """Remove the current object from the database
        along with any files or folders

        Args:
            check (bool): a value to be explicitly set
            in order for the video to be deleted

        Returns:
           tuple: (int: 0, str: '') - the document was not found
           tuple: (int: 1, str: '') - the document was found and deleted
            but had not path key
           tuple: (int: 2, str: '') - the document and path were found
            both were deleted
           str: check did not pass
        """

        if self._mode == 'test':
            return 'Data cannot be deleted in test mode'

        if not check:
            return ''

        result = COLLECTION.find_one({'_id': self.video_id}, {
            '_id': True,
            'path': True
        })

        if not result:
            res = (0, '')
        elif 'path' not in result.keys():
            COLLECTION.delete_one({'_id': self.video_id})
            res = (1, '')
            logger.info('video_id %s deleted - no folder found', self.video_id)
        else:
            COLLECTION.delete_one({'_id': self.video_id})
            path: str = result['path']
            try:
                shutil.rmtree(os.path.split(path)[0], ignore_errors=True)
            except FileNotFoundError as e:
                logger.error(e)
            res = (2, path)
            logger.info('video_id %s deleted - folder %s deleted',
                        self.video_id, path)

        return res
Example #11
0
def add_queue(video_id: str, tags: list = None) -> bool:
    """If the video_id does not exist, insert
    an entry into the database from the information
    provided by get_video_info

    Args:
        video_id (str): The id of a youtube video
        tags (list): A list of tags

    Returns:
        True: item was added
        False: exception occurred

    Raises:
        TypeError: invalid video_id type
    """
    ret = False
    vid_info = None

    if tags is None:
        tags = ['undefined']
    if not isinstance(video_id, str):
        raise TypeError(f'{video_id} should be str not {type(video_id)}')

    if check_db(video_id) is True:
        logger.info('video_id %s already exists in database', video_id)
        ret = False
    else:
        vid_info = get_video_info(video_id, tags)

    if vid_info is not None:
        COLLECTION.insert_one(get_video_info(video_id, tags))
        logger.info('%s successfully inserted', video_id)
        ret = True

    return ret
Example #12
0
def get_info(url_query):
    """
    Perform a GET request on the url passed into the function, parse the data
    and sanitise it of any unwanted data then return the parsed data as a list
    of strings

    :param url_query: page to be queried
    :type url_query: str
    :returns: list of filtered strings from webpage
    :rtype: list
    """
    try:
        req = requests.get(url_query, timeout=30)
    except (ConnectionError, requests.exceptions.ReadTimeout) as e:
        logger.error(e)
        req = None

    if req is None or req.status_code != 200:
        logger.info('%s returned no information or invalid status', url_query)
        info = ''
    else:
        remove_tags = [
            'pre', 'script', 'nav', 'footer', 'form', 'input', 'meta'
        ]
        soup = BeautifulSoup(req.text, features='html.parser')
        soup = soup.find('div', attrs={'class': 'mw-parser-output'})
        #  soup = soup.find('p')

        for f in remove_tags:  # remove this stuff from the soup object
            for j in soup.find_all(f):
                j.decompose()

        info = soup.text.replace('\n',
                                 '').replace('"', '').replace("'", '').lower()

    return info
Example #13
0
 def processed(self, p: bool) -> None:
     if not isinstance(p, bool):
         raise ValueError('processed must be bool')
     self._processed = p
     logger.info('processed attribute set to %s', str(p))
Example #14
0
 def file_path(self, path: str) -> None:
     if not isinstance(path, str):
         raise ValueError('Path must be str')
     self._file_path = path
     logger.info('file_path attribute set to %s', path)
Example #15
0
# pylint: disable=all
import os
import pickle

from app.project_logging import logger
from app.database import push_object, flush_redis


"""
Import test data into redis
"""

file = os.path.join(os.path.dirname(__file__), 'analysis_objects0.pickle')
logger.info('Reading from %s', file)

with open(file, 'rb') as fl:
    data_in = pickle.loads(fl.read())


def main():
    start_count = len(data_in)
    end_count = 0
    while len(data_in) != 0:
        data = data_in.pop(0)
        push_object(data, 'analysis_objects0')
        end_count = end_count + 1
    logger.info('Imported %d out of %d', end_count, start_count)


if __name__ == '__main__':
    flush_redis()