Beispiel #1
0
class ActiveLearner:
    def __init__(self, email, password, api_endpoint, project_id):
        kauth = KiliAuth(email, password, api_endpoint=api_endpoint)
        self.playground = Playground(kauth)
        self.project_id = project_id

    def get_assets_to_evaluate(self):
        assets = self.playground.assets(project_id=self.project_id)
        assets_to_evaluate = []
        for asset in assets:
            if len(asset['labels']) == 0:
                assets_to_evaluate.append(asset)

        return assets_to_evaluate

    def prioritize_assets(self, assets, scorer, *args, **kwargs):
        assets_score = [scorer(asset, *args, **kwargs) for asset in assets]
        ranked_assets_with_score = sorted(list(zip(assets, assets_score)),
                                          key=lambda x: x[1],
                                          reverse=True)
        ranked_assets = [
            asset_with_score[0]
            for asset_with_score in ranked_assets_with_score
        ]
        return ranked_assets

    def update_assets_priority(self, assets):
        for i, asset in enumerate(tqdm(assets)):
            asset_id = asset['id']
            self.playground.update_properties_in_asset(asset_id=asset_id,
                                                       priority=i)
        return True
def main(api_endpoint):
    email = input('Enter email: ')
    password = getpass.getpass()
    source_project_id = input(
        'Enter project IDs (separate them by "," if you want to provide several): '
    )

    kauth = KiliAuth(email=email, password=password, api_endpoint=api_endpoint)
    playground = Playground(kauth)

    df = pd.DataFrame(columns=['Project', 'Date', 'Email'])
    for project_id in source_project_id.split(','):
        project = playground.projects(project_id=project_id)[0]
        assets = playground.assets(project_id=project_id)
        title = project['title']
        for asset in assets:
            for label in asset['labels']:
                created_at = label['createdAt'][:10]
                author_email = label['author']['email']
                df = df.append(
                    {
                        'Project': title,
                        'Date': created_at,
                        'Email': author_email
                    },
                    ignore_index=True)
    df_grouped = df.groupby(['Project', 'Date', 'Email']).size()
    time = datetime.now().strftime('%Y%m%d%H%M')
    df_grouped.to_excel(f'labeler-stats-{time}.xlsx')
Beispiel #3
0
class TransferLearning:
    def __init__(self,
                 api_key,
                 api_endpoint,
                 project_id,
                 number_of_inferences,
                 minimum_number_of_assets_to_launch_training=100):
        kauth = KiliAuth(api_key=api_key, api_endpoint=api_endpoint)

        self.playground = Playground(kauth)
        self.project_id = project_id
        self.current_inference_number = 0
        self.current_training_number = 0
        self.last_training_number = -1
        self.assets_seen_in_training = []
        self.minimum_number_of_assets_to_launch_training = minimum_number_of_assets_to_launch_training
        self.number_of_inferences = number_of_inferences

    def _current_training_number(self):
        return self.current_training_number

    def get_assets_to_train(self):
        assets = self.playground.assets(project_id=self.project_id)
        assets_to_train = []
        for asset in assets:
            default_labels = get_labels_of_types(asset, ['DEFAULT'])
            review_labels = get_labels_of_types(asset, ['REVIEWED'])
            if len(review_labels) > 0:
                asset['labels'] = [review_labels[-1]]
                assets_to_train.append(asset)
            elif len(default_labels) == 1:
                asset['labels'] = [default_labels[-1]]
                assets_to_train.append(asset)
            elif len(review_labels) == 0 and len(default_labels) > 0:
                print(
                    f'Asset {asset["id"]} has several labels: it should be reviewed'
                )
            else:
                continue

        return assets_to_train

    def train(self, assets_to_train):
        print(
            f'Launch training for {len(assets_to_train)} assets: {[asset["id"] for asset in assets_to_train]}'
        )
        return

    def launch_train(self):
        time.sleep(SECONDS_TO_WAIT)
        assets_to_train = self.get_assets_to_train()
        if len(self.assets_seen_in_training) == 0:
            filtered_assets_to_train = assets_to_train
        else:
            filtered_assets_to_train = [
                asset for asset in assets_to_train if all([
                    asset['id'] not in training
                    for training in self.assets_seen_in_training
                ])
            ]
        if len(filtered_assets_to_train
               ) >= self.minimum_number_of_assets_to_launch_training:
            self.train(filtered_assets_to_train)
            self.current_training_number += 1
            self.assets_seen_in_training.append(
                [asset['id'] for asset in filtered_assets_to_train])

    def get_assets_to_predict(self):
        assets = self.playground.assets(project_id=self.project_id)
        assets_to_predict = []
        for asset in assets:
            labels = get_labels_of_types(asset, ['DEFAULT', 'REVIEWED'])

            if len(labels) == 0:
                assets_to_predict.append(asset)

        return assets_to_predict

    def predict(self, assets_to_predict):
        print(
            f'Launch inference for {len(assets_to_predict)} assets: {[asset["id"] for asset in assets_to_predict]}'
        )
        return

    def launch_predict(self):
        time.sleep(SECONDS_TO_WAIT)
        if self.current_training_number == self.last_training_number:
            print('Inference will not be launched for now...')
            return
        assets_to_predict = self.get_assets_to_predict()
        if len(assets_to_predict) > 0:
            current_training_number = self.current_training_number
            self.predict(assets_to_predict)
            self.last_training_number = current_training_number
            self.current_inference_number += 1

    def launch_tensorboard(self):
        print('Starting Tensorboard...')
        subprocess.Popen(['tensorboard', '--logdir=runs'])
        print('You can access Tensorboard at http://localhost:6006\n')

    def launch(self):
        self.launch_tensorboard()
        while self.current_inference_number < self.number_of_inferences:
            self.launch_train()
            self.launch_predict()
Beispiel #4
0

class Playground(
        kili.mutations.asset.MutationsAsset,
        kili.mutations.label.MutationsLabel,
        kili.mutations.organization.MutationsOrganization,
        kili.mutations.project.MutationsProject,
        kili.mutations.user.MutationsUser,
        kili.queries.asset.QueriesAsset,
        kili.queries.label.QueriesLabel,
        kili.queries.organization.QueriesOrganization,
        kili.queries.project.QueriesProject,
        kili.queries.project_user.QueriesProjectUser,
        kili.queries.user.QueriesUser,
        kili.subscriptions.label.SubscriptionsLabel):

    def __init__(self, auth=None):
        """Create an instance of KiliPlayground."""
        self.auth = auth
        super().__init__(auth)


if __name__ == '__main__':
    """ Example of usage """
    from kili.authentication import KiliAuth
    from kili.playground import Playground
    kauth = KiliAuth()
    playground = Playground(kauth)
    assets = playground.assets(project_id="first-project")
    print(assets)
Beispiel #5
0
import getpass
import json

import yaml
from tqdm import tqdm

from kili.authentication import KiliAuth
from kili.playground import Playground

email = input('Enter email: ')
password = getpass.getpass()
project_id = input('Enter project id: ')

kauth = KiliAuth(email=email, password=password)
playground = Playground(kauth)

assets = playground.assets(project_id=project_id)
asset_ids = [asset['id'] for asset in assets]
playground.delete_many_from_dataset(asset_ids=asset_ids)