예제 #1
0
def main(model_identifier):
    api = HfApi()
    model_list = api.model_list()
    model_dict = [
        model_dict for model_dict in model_list
        if model_dict.modelId == model_identifier
    ][0]

    model_identifier = "_".join(model_identifier.split("/"))

    http = "https://s3.amazonaws.com/"
    hf_url = "models.huggingface.co/"
    config_path_aws = http + hf_url + model_dict.key
    file_name = "./{}_config.json".format(model_identifier)

    bash_command = "curl {} > {}".format(config_path_aws, file_name)
    os.system(bash_command)

    with open(file_name) as f:
        config_json = json.load(f)

    bash_command = "rm {}".format(file_name)
    os.system(bash_command)

    ##### HERE YOU SHOULD STATE WHICH PARAMS WILL BE CHANGED #####
    # e.g. config_json["decoder_start_token_id"] = 2

    # save config as it was saved before
    with open(file_name, "w") as f:
        json.dump(config_json, f, indent=2, sort_keys=True)

    # upload new config
    bash_command = "aws s3 cp {} s3://{}".format(file_name,
                                                 hf_url + model_dict.key)
    os.system(bash_command)
예제 #2
0
def update_config(model_identifier, updates):
    api = HfApi()
    model_list = api.model_list()
    model_dict = [
        model_dict for model_dict in model_list
        if model_dict.modelId == model_identifier
    ][0]

    model_identifier = "_".join(model_identifier.split("/"))

    http = "https://s3.amazonaws.com/"
    hf_url = "models.huggingface.co/"
    config_path_aws = http + hf_url + model_dict.key
    file_name = f"./{model_identifier}_config.json"

    bash_command = f"curl {config_path_aws} > {file_name}"
    os.system(bash_command)

    with open(file_name) as f:
        config_json = json.load(f)

    bash_command = "rm {}".format(file_name)
    os.system(bash_command)

    ##### HERE YOU SHOULD STATE WHICH PARAMS WILL BE CHANGED #####
    config_json.update(updates)

    # save config as it was saved before
    with open(file_name, "w") as f:
        json.dump(config_json, f, indent=2, sort_keys=True)

    # upload new config
    bash_command = f"s3cmd cp {file_name} s3://{hf_url + model_dict.key}"
    os.system(bash_command)
def find_pretrained_model(src_lang: str, tgt_lang: str) -> List[str]:
    """Find models that can accept src_lang as input and return tgt_lang as output."""
    prefix = "Helsinki-NLP/opus-mt-"
    api = HfApi()
    model_list = api.model_list()
    model_ids = [x.modelId for x in model_list if x.modelId.startswith("Helsinki-NLP")]
    src_and_targ = [
        remove_prefix(m, prefix).lower().split("-") for m in model_ids if "+" not in m
    ]  # + cant be loaded.
    matching = [f"{prefix}{a}-{b}" for (a, b) in src_and_targ if src_lang in a and tgt_lang in b]
    return matching
def clean_all_community_configs(model_list=None, do_upload=False, do_delete=True):
    api = HfApi()
    model_dict_list = api.model_list()

    if model_list is not None:
        model_dict_list = [model_dict for model_dict in model_dict_list if model_dict.modelId in model_list]
    for i, model_dict in enumerate(model_dict_list):
        model_identifier = model_dict.modelId

        hf_url = 'models.huggingface.co/'
        local_model_path = '_'.join(model_identifier.split('/'))
        path_to_config = "./{}_config.json".format(local_model_path)

        print("\n{}: Summary for {}".format(i, model_identifier))
        print(50 * '-')

        try:
            config = AutoConfig.from_pretrained(model_identifier)
        except Exception as e:  # noqa: E722
            print('CONF ERROR: {} config can not be loaded'.format(model_identifier))
            print('Message: {}'.format(e))
            print(50 * '=')
            continue

        # create temp dir
        temp_dir = path_to_config.split('.json')[0]
        if os.path.exists(temp_dir):
            os.system('rm -r {}'.format(temp_dir))
        os.mkdir(temp_dir)

        # save config locally to only use diff
        config.save_pretrained(temp_dir)
        diff_config = AutoConfig.from_pretrained(os.path.join(temp_dir, 'config.json'))

        if config != diff_config:
            print("Author: {} needs to be notified about changed conf: {}".format(model_dict.author, model_identifier))

        if do_upload is True:
            # upload new config
            bash_command = 'aws s3 cp {} s3://{}'.format(os.path.join(temp_dir, 'config.json'), hf_url + model_dict.key)
            os.system(bash_command)

            # delete saved config
        
        if do_delete is True:
            os.system('rm -r {}'.format(temp_dir))

        print(50 * '=')
예제 #5
0
 def test_model_names(self):
     model_list = HfApi().model_list()
     model_ids = [
         x.modelId for x in model_list if x.modelId.startswith(ORG_NAME)
     ]
     bad_model_ids = [mid for mid in model_ids if "+" in model_ids]
     self.assertListEqual([], bad_model_ids)
     self.assertGreater(len(model_ids), 500)
def change_model_list(change_fn,
                      model_list=None,
                      do_upload=False,
                      key_word=None):
    api = HfApi()
    model_dict_list = api.model_list()

    if model_list is not None:
        model_dict_list = [
            model_dict for model_dict in model_dict_list
            if model_dict.modelId in model_list
        ]

    if key_word is not None:
        model_dict_list = [
            model_dict for model_dict in model_dict_list
            if key_word in model_dict.modelId
        ]

    for model_dict in model_dict_list:
        model_identifier = model_dict.modelId

        print("model_identifier")

        http = 'https://s3.amazonaws.com/'
        hf_url = 'models.huggingface.co/'
        config_path_aws = http + hf_url + model_dict.key
        model_identifier = '_'.join(model_identifier.split('/'))
        path_to_config, config_json = download(config_path_aws,
                                               model_identifier)

        config_json = change_fn(config_json)

        # save config as it was saved before
        with open(path_to_config, 'w') as f:
            json.dump(config_json, f, indent=2, sort_keys=True)

        if do_upload is True:
            # upload new config
            bash_command = 'aws s3 cp {} s3://{}'.format(
                path_to_config, hf_url + model_dict.key)
            os.system(bash_command)

            # delete saved config
            os.system('rm {}'.format(path_to_config))
예제 #7
0
    def available(self):
        """
        Runs a query to get a list of available language models from the Hugging Face API.

        Returns:
            list of available language name ids
        """

        return set(x.modelId for x in HfApi().model_list() if x.modelId.startswith("Helsinki-NLP"))
    def test_hub_configs(self):
        """I put require_torch_and_cuda cause I only want this to run with self-scheduled."""

        model_list = HfApi().model_list()
        org = "sshleifer"
        model_ids = [x.modelId for x in model_list if x.modelId.startswith(org)]
        allowed_to_be_broken = ["sshleifer/blenderbot-3B", "sshleifer/blenderbot-90M"]
        failures = []
        for m in model_ids:
            if m in allowed_to_be_broken:
                continue
            try:
                AutoConfig.from_pretrained(m)
            except Exception:
                failures.append(m)
        assert not failures, f"The following models could not be loaded through AutoConfig: {failures}"
예제 #9
0
    def fit(self, inputs=None, wait=True, logs="All", job_name=None, experiment_config=None):
        if self.huggingface_token and wait is True:
            logger.info(f"creating repository {self.base_job_name} on the HF hub")
            self.repo_url = HfApi().create_repo(token=self.huggingface_token, name=self.base_job_name)

        # parent fit method
        super(HuggingFace, self).fit(inputs, wait, logs, job_name, experiment_config)

        if self.huggingface_token and wait is True:
            logger.info(f"downloading model to {self.latest_training_job.name}/ ")
            self.download_model(".", True)

            logger.info(f"initalizing model repository ")
            model_repo = HfRepository(
                repo_url=self.repo_url,
                huggingface_token=self.huggingface_token,
                model_dir=f"./{self.latest_training_job.name}",
            )
            model_repo.init_new_repository()

            logger.info("uploading model files to HF hub")
            model_repo.commit_files_and_push_to_hub()
예제 #10
0
 def __init__(self, args):
     self.args = args
     self._api = HfApi()
예제 #11
0
class HfApiCommonTest(unittest.TestCase):
    _api = HfApi(endpoint=ENDPOINT_STAGING)
예제 #12
0
 def test_model_list(self):
     _api = HfApi()
     models = _api.model_list()
     self.assertGreater(len(models), 100)
     self.assertIsInstance(models[0], ModelInfo)
예제 #13
0
 def test_staging_model_list(self):
     _api = HfApi(endpoint=ENDPOINT_STAGING)
     _ = _api.model_list()
def get_all_model_paths():
    api = HfApi()
    model_list = [model_dict.modelId for model_dict in api.model_list()]
    return model_list
예제 #15
0
class HfApiCommonTest(unittest.TestCase):
    _api = HfApi(endpoint="https://moon-staging.huggingface.co")
예제 #16
0
 def test_model_count(self):
     model_list = HfApi().model_list()
     expected_num_models = 1011
     actual_num_models = len(
         [x for x in model_list if x.modelId.startswith("Helsinki-NLP")])
     self.assertEqual(expected_num_models, actual_num_models)