コード例 #1
0
ファイル: qlabel.py プロジェクト: colinsongf/crf_label
def do_train(args):
    # Load configuration
    config = ConfigParser()
    config.read_file(args.config)

    data = DataStore(config)

    # Create the CRF model.
    model = CRF(config)

    retrain_epochs = config["training"].getint("retrain_every")

    accuracy = []

    with EditShell(config) as shell:
        while data.has_next():
            conll = data.next()
            i = data.i()

            # if the data doesn't have tags, try to smart-tag them.
            if len(conll[0]) == DataStore.TAG_LABEL+1:
                tags = [tok[DataStore.TAG_LABEL] for tok in conll]
            else:
                tags = model.infer(conll)

            try:
                #conll_display = ["{}/{}".format(token[0], token[2]) for token in conll]
                conll_display = ["{}".format(token[0]) for token in conll]

                # Create a copy of the list
                action = shell.run(conll_display, list(tags), metadata=render_progress(data, accuracy))

                if action.type == ":prev":
                    try:
                        data.rewind(2) # move 2 indices back
                    except AttributeError:
                        data.rewind(1)
                elif action.type == ":goto":
                    doc_idx, = action.args
                    assert doc_idx >= 0
                    data.goto(doc_idx)
                elif action.type == "save":
                    _, tags_ = action.args
                    accuracy.append(score(tags, tags_))

                    data.update(conll, tags_)

                    if i % retrain_epochs == 0:
                        model.retrain()

            except QuitException:
                break
コード例 #2
0
class YTPlaylist:
    def __init__(self, yt_resource: Resource, playlist_id) -> None:
        """
        :param yt_resource: googleapiclient.discovery.Resource object
        :param playlist_id: youtube playlist ID
        """
        self._yt_resource = yt_resource
        self._playlist_id = playlist_id
        self._videos_store = DataStore()

    def fetch_videos(self):
        with alive_bar(manual=True, bar='smooth',
                       spinner='dots_reverse') as bar:

            nextPageToken = None
            page = 1
            while True:
                pl_item_request = self._yt_resource.playlistItems().list(
                    part='snippet',
                    playlistId=self._playlist_id,
                    pageToken=nextPageToken)
                pl_item_response = pl_item_request.execute()

                for item in pl_item_response['items']:
                    self._videos_store.update(title=item['snippet']['title'],
                                              id='#')

                bar(page /
                    ceil(pl_item_response['pageInfo']['totalResults'] /
                         pl_item_response['pageInfo']['resultsPerPage']))
                page += 1

                nextPageToken = pl_item_response.get('nextPageToken')
                if not nextPageToken:
                    break
        print()

    def print_videos(self):
        print_heading('<< VIDEOS IN THE PLAYLIST >>')
        self._videos_store.print()

    def get_videos_serial(self):
        # {Title: serial_number}
        video_serial = {}
        i = 1
        for cache_unit in self._videos_store.list():
            video_serial.update({cache_unit['title']: i})
            i += 1
        return video_serial
コード例 #3
0
class YTChannel:
    def __init__(self, yt_resource: Resource, channel_id) -> None:
        """
        :param yt_resource: googleapiclient.discovery.Resource object
        :param channel_id: youtube channel ID
        """
        self._yt_resource = yt_resource
        self._channel_id = channel_id
        self._playlist_store = DataStore()

    @property
    def total_playlists(self):
        return self._playlist_store.len

    def fetch_playlists(self):
        with alive_bar(manual=True, bar='smooth',
                       spinner='dots_reverse') as bar:

            nextPageToken = None
            page = 1
            while True:
                pl_request = self._yt_resource.playlists().list(
                    part='snippet',
                    channelId=self._channel_id,
                    pageToken=nextPageToken)
                pl_response = pl_request.execute()

                for item in pl_response['items']:
                    self._playlist_store.update(title=item['snippet']['title'],
                                                id=item['id'])

                bar(page / ceil(pl_response['pageInfo']['totalResults'] /
                                pl_response['pageInfo']['resultsPerPage']))

                page += 1

                nextPageToken = pl_response.get('nextPageToken')
                if not nextPageToken:
                    break
        print()

    def print_playlists(self):
        print_heading('<< PLAYLISTS >>')
        self._playlist_store.print()

    def select_playlist(self, playlist_no):
        return self._playlist_store.list()[playlist_no - 1]
コード例 #4
0
class Youtube:
    def __init__(self, yt_resource: Resource) -> None:
        """
        :param yt_resource: googleapiclient.discovery.build object
        """
        self._yt_resource = yt_resource
        self._channel_store = DataStore()
        self._subscribers = []

    @property
    def total_channels(self):
        return self._channel_store.len

    def search_channel(self, query: str) -> list:
        yt_request = self._yt_resource.search().list(part='snippet',
                                                     type='channel',
                                                     maxResults=9,
                                                     q=query)
        yt_response = yt_request.execute()

        for item in yt_response['items']:
            self._channel_store.update(title=item['snippet']['title'],
                                       id=item['snippet']['channelId'])

    def print_channels(self):
        channel_output = capture_stdout(lambda: self._channel_store.print())
        self._fetch_subscribers_of_all()

        print_heading('#   Channel (Subscribers)')
        for index, out in enumerate(channel_output):
            print(f'{out} ({self._subscribers[index]})')

    def select_channel(self, channel_no: int):
        return self._channel_store.list()[channel_no - 1]

    def subscriber_count(self, channel_id):
        def shorten_M_K(subs: str):
            length = len(str(subs))
            subs = int(subs)
            if length > 6:
                subs /= 10**6
                subs = str(subs)
                if length >= 9:
                    subs = subs[:length - 6] + 'M'
                else:
                    subs = subs[:4] + 'M'
            elif length > 3:
                subs /= 10**3
                subs = str(subs)
                if length == 6:
                    subs = subs[:3] + 'K'
                else:
                    subs = subs[:4] + 'K'
            return subs

        # self._yt_resource is not working if used directly in multi-threading. Problem not known.
        # Error: malloc(): unsorted double linked list corrupted
        # Using deepcopy it works fine
        yt_response = deepcopy(self._yt_resource).channels().list(
            part='statistics',
            id=channel_id,
        ).execute()

        if yt_response['items'][0]['statistics']['hiddenSubscriberCount']:
            return
        return shorten_M_K(
            yt_response['items'][0]['statistics']['subscriberCount'])

    def _fetch_subscribers_of_all(self):
        def get_subs(channel_id, index, progress_bar):
            subs = self.subscriber_count(channel_id)
            if not subs:
                subs = 'Hidden'
            self._subscribers[index] = subs
            progress_bar()

        self._subscribers = [None] * self._channel_store.len

        with alive_bar(total=len(self._subscribers),
                       bar='smooth',
                       spinner='dots_reverse') as bar:
            threads = []
            for index, channel_cache_unit in enumerate(
                    self._channel_store.list()):
                thread = Thread(target=get_subs,
                                args=(channel_cache_unit['id'], index, bar))
                threads.append(thread)
                thread.start()

            for thread in threads:
                thread.join()
コード例 #5
0
class CacheManager:
    def __init__(self, config) -> None:
        self.__local_playlist_cache_path = os.path.join(
            config['local_cache'], 'playlist.json')
        self.__local_channel_cache_path = os.path.join(config['local_cache'],
                                                       'channel.json')
        self.__shared_channel_cache_path = os.path.join(
            config['shared_cache'], 'channel.json')

        self.__indent = config['cache_indent']

        self._local_playlist_cache = DataStore()
        self._local_channel_cache = DataStore()
        self._shared_channel_cache = DataStore()

        self.create_folder(config['local_cache'])
        self.create_folder(config['shared_cache'])

        self.load()

    @property
    def local_playlist_cache(self):
        return self._local_playlist_cache

    @property
    def local_channel_cache(self):
        return self._local_channel_cache

    @property
    def shared_channel_cache(self):
        return self._shared_channel_cache

    @staticmethod
    def _create_cache(cache_unit: dict):
        local_cache = DataStore()
        local_cache.update(cache_unit['title'], cache_unit['id'])
        return local_cache

    def update_playlist_cache(self, playlist_cache_unit: dict):
        self.update_local_playlist_cache(playlist_cache_unit)

    def update_channel_cache(self, channel_cache_unit: dict):
        self.update_local_channel_cache(channel_cache_unit)
        self.update_shared_channel_cache(channel_cache_unit)

    def update_local_playlist_cache(self, playlist_cache_unit: dict):
        self._local_playlist_cache = self._create_cache(playlist_cache_unit)

    def update_local_channel_cache(self, channel_cache_unit: dict):
        self._local_channel_cache = self._create_cache(channel_cache_unit)

    def update_shared_channel_cache(self, channel_cache_unit: dict):
        self._shared_channel_cache.update(channel_cache_unit['title'],
                                          channel_cache_unit['id'])

    def load(self):
        self.load_local_playlist_cache()
        self.load_local_channel_cache()
        self.load_shared_channel_cache()

    def load_local_playlist_cache(self):
        r = DataStore.retrieve_from_file(self.__local_playlist_cache_path)
        if r:
            self._local_playlist_cache.load(r)

    def load_local_channel_cache(self):
        r = DataStore.retrieve_from_file(self.__local_channel_cache_path)
        if r:
            self._local_channel_cache.load(r)

    def load_shared_channel_cache(self):
        r = DataStore.retrieve_from_file(self.__shared_channel_cache_path)
        if r:
            self._shared_channel_cache.load(r)

    @staticmethod
    def create_folder(folder):
        if not os.path.exists(folder):
            os.mkdir(folder)

    def is_local_playlist_cache_available(self):
        return True if self._local_playlist_cache.len > 0 else False

    def is_local_channel_cache_available(self):
        return True if self._local_channel_cache.len > 0 else False

    def is_shared_channel_cache_available(self):
        return True if self._shared_channel_cache.len > 0 else False

    def _dump(self, path, cache_list: list):
        with open(path, 'w') as f:
            json.dump(cache_list, f, indent=self.__indent)

    def save(self):
        self.save_local_playlist_cache()
        self.save_local_channel_cache()
        self.save_shared_channel_cache()

    def save_local_playlist_cache(self):
        self._dump(self.__local_playlist_cache_path,
                   self.local_playlist_cache.list())

    def save_local_channel_cache(self):
        self._dump(self.__local_channel_cache_path,
                   self.local_channel_cache.list())

    def save_shared_channel_cache(self):
        self._dump(self.__shared_channel_cache_path,
                   self.shared_channel_cache.list())

    def delete_local_playlist_cache(self):
        self._local_playlist_cache.purge()
        try:
            os.remove(self.__local_playlist_cache_path)
        except:
            pass

    def delete_local_channel_cache(self):
        self._local_channel_cache.purge()
        try:
            os.remove(self.__local_channel_cache_path)
        except:
            pass
コード例 #6
0
 def _create_cache(cache_unit: dict):
     local_cache = DataStore()
     local_cache.update(cache_unit['title'], cache_unit['id'])
     return local_cache