Beispiel #1
0
class ActiveLearner:
    def __init__(self, email, password, api_endpoint, project_id):
        kauth = KiliAuth(email, password, api_endpoint=api_endpoint)
        self.playground = Playground(kauth)
        self.project_id = project_id

    def get_assets_to_evaluate(self):
        assets = self.playground.get_assets(project_id=self.project_id)
        assets_to_evaluate = []
        for asset in assets:
            if len(asset['labels']) == 0:
                assets_to_evaluate.append(asset)

        return assets_to_evaluate

    def prioritize_assets(self, assets, scorer, *args, **kwargs):
        assets_score = [scorer(asset, *args, **kwargs) for asset in assets]
        ranked_assets_with_score = sorted(list(zip(assets, assets_score)),
                                          key=lambda x: x[1],
                                          reverse=True)
        ranked_assets = [
            asset_with_score[0]
            for asset_with_score in ranked_assets_with_score
        ]
        return ranked_assets

    def update_assets_priority(self, assets):
        for i, asset in enumerate(tqdm(assets)):
            asset_id = asset['id']
            self.playground.update_properties_in_asset(asset_id=asset_id,
                                                       priority=i)
        return True
def main(api_endpoint):
    email = input('Enter email: ')
    password = getpass.getpass()
    source_project_id = input(
        'Enter project IDs (separate them by "," if you want to provide several): '
    )

    kauth = KiliAuth(email=email, password=password, api_endpoint=api_endpoint)
    playground = Playground(kauth)

    df = pd.DataFrame(columns=['Project', 'Date', 'Email'])
    for project_id in source_project_id.split(','):
        project = playground.get_project(project_id=project_id)
        assets = playground.get_assets(project_id=project_id)
        title = project['title']
        for asset in assets:
            for label in asset['labels']:
                created_at = label['createdAt'][:10]
                author_email = label['author']['email']
                df = df.append(
                    {
                        'Project': title,
                        'Date': created_at,
                        'Email': author_email
                    },
                    ignore_index=True)
    df_grouped = df.groupby(['Project', 'Date', 'Email']).size()
    time = datetime.now().strftime('%Y%m%d%H%M')
    df_grouped.to_excel(f'labeler-stats-{time}.xlsx')
class TransferLearning:
    def __init__(self, email, password, api_endpoint, project_id, minimum_number_of_assets_to_launch_training=100):
        kauth = KiliAuth(email, password, api_endpoint=api_endpoint)

        self.playground = Playground(kauth)
        self.project_id = project_id
        self.current_training_number = 0
        self.last_training_number = -1
        self.assets_seen_in_training = []
        self.minimum_number_of_assets_to_launch_training = minimum_number_of_assets_to_launch_training

    def _current_training_number(self):
        return self.current_training_number

    def get_assets_to_train(self):
        assets = self.playground.get_assets(project_id=self.project_id)
        assets_to_train = []
        for asset in assets:
            default_labels = get_labels_of_types(asset, ['DEFAULT'])
            review_labels = get_labels_of_types(asset, ['REVIEWED'])
            if len(review_labels) > 0:
                asset['labels'] = [review_labels[-1]]
                assets_to_train.append(asset)
            elif len(default_labels) == 1:
                asset['labels'] = [default_labels[-1]]
                assets_to_train.append(asset)
            elif len(review_labels) == 0 and len(default_labels) > 0:
                print(
                    f'Asset {asset["id"]} has several labels: it should be reviewed')
            else:
                continue

        return assets_to_train

    def train(self, assets_to_train):
        print(
            f'Launch training for {len(assets_to_train)} assets: {[asset["id"] for asset in assets_to_train]}')
        return

    def launch_train(self):
        time.sleep(SECONDS_TO_WAIT)
        assets_to_train = self.get_assets_to_train()
        if len(self.assets_seen_in_training) == 0:
            filtered_assets_to_train = assets_to_train
        else:
            filtered_assets_to_train = [asset for asset in assets_to_train
                                        if all([asset['id'] not in training
                                                for training in self.assets_seen_in_training])]
        if len(filtered_assets_to_train) >= self.minimum_number_of_assets_to_launch_training:
            self.train(filtered_assets_to_train)
            self.current_training_number += 1
            self.assets_seen_in_training.append(
                [asset['id'] for asset in filtered_assets_to_train])

    def get_assets_to_predict(self):
        assets = self.playground.get_assets(project_id=self.project_id)
        assets_to_predict = []
        for asset in assets:
            labels = get_labels_of_types(asset, ['DEFAULT', 'REVIEWED'])

            if len(labels) == 0:
                assets_to_predict.append(asset)

        return assets_to_predict

    def predict(self, assets_to_predict):
        print(
            f'Launch inference for {len(assets_to_predict)} assets: {[asset["id"] for asset in assets_to_predict]}')
        return

    def launch_predict(self):
        time.sleep(SECONDS_TO_WAIT)
        if self.current_training_number == self.last_training_number:
            print('Inference will not be launched for now...')
            return
        assets_to_predict = self.get_assets_to_predict()
        if len(assets_to_predict) > 0:
            current_training_number = self.current_training_number
            self.predict(assets_to_predict)
            self.last_training_number = current_training_number

    def launch_tensorboard(self):
        print('Starting Tensorboard...')
        subprocess.Popen(['tensorboard', '--logdir=runs'])
        print('You can access Tensorboard at http://localhost:6006\n')

    def launch(self):
        self.launch_tensorboard()
        while True:
            self.launch_train()
            self.launch_predict()
import getpass
import json

import yaml
from tqdm import tqdm

from kili.authentication import KiliAuth
from kili.playground import Playground

email = input('Enter email: ')
password = getpass.getpass()
project_id = input('Enter project id: ')

kauth = KiliAuth(email=email, password=password)
playground = Playground(kauth)

assets = playground.get_assets(project_id=project_id)

for asset in tqdm(assets):
    playground.delete_from_dataset(asset_id=asset['id'])