Example #1
0
def download(url=None, outfile=None, verbose=True):
    if url is not None:
        if not isinstance(url, str):
            raise TypeError('Input url must be a string')
        if outfile is None:
            outfile = prisim_path+tarfilename
        elif not isinstance(outfile, str):
            raise TypeError('outfile must be a string')
        gdown.download(url, outfile, quiet=(not verbose))
Example #2
0
File: data.py Project: wkentaro/fcn
def cached_download(url, path, md5=None, quiet=False, postprocess=None):

    def check_md5(path, md5):
        print('[{:s}] Checking md5 ({:s})'.format(path, md5))
        return md5sum(path) == md5

    if osp.exists(path) and not md5:
        print('[{:s}] File exists ({:s})'.format(path, md5sum(path)))
    elif osp.exists(path) and md5 and check_md5(path, md5):
        pass
    else:
        dirpath = osp.dirname(path)
        if not osp.exists(dirpath):
            os.makedirs(dirpath)
        gdown.download(url, path, quiet=quiet)

    if postprocess is not None:
        postprocess(path)

    return path
Example #3
0
import os, sys
from zipfile import ZipFile
import logging

logger = logging.getLogger(__name__)

# go to parent directory
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(current_dir)
sys.path.insert(0, parent_dir)

from utils import *

gdrive_template = "https://drive.google.com/uc?id="
file_name = "frames_data.zip"
file_dir = "frames/data"

if __name__ == "__main__":
    file_id = "1OWHtNG8SIxnDagWMyM-7lVshLcZe4p7M"
    url = gdrive_template + file_id

    os.makedirs(file_dir, exist_ok=True)
    file_path = os.path.join(file_dir, file_name)

    logger.info("Downloading Frames file!")

    gdown.download(url, file_path, quiet=False)

    with ZipFile(file_path, 'r') as zipObj:
        # Extract all the contents of zip file in current directory
        zipObj.extractall(file_dir)
Example #4
0
def download_from_drive(url: str, filepath: str):
    info("Downloading {} dataset from {}".format(filepath, url))
    gdown.download(url, filepath, quiet=True)
Example #5
0
from ilm.infer import infill_with_ilm
import gdown

# Variables
MODEL_DIR = 'model/'
MASK_CLS = 'ilm.mask.hierarchical.MaskHierarchical'
result = []
tokenizer = ilm.tokenize_util.Tokenizer.GPT2

datamodel = 'model/pytorch_model.bin'
model_location = "https://drive.google.com/uc?id=1-12EFaKNBYD1vlfeZcKnV5PaSqeHNTHX"

if os.path.isfile(datamodel):
    ('Model was already downloaded.')
else:
    gdown.download(model_location, datamodel)

# Create context
context = 'The sun is shining. _ All the children want to swim.'


class INFILL:
    def infilling_sentence(self, context: str):
        result.clear()
        with open(os.path.join(MODEL_DIR, 'additional_ids_to_tokens.pkl'),
                  'rb') as f:
            additional_ids_to_tokens = pickle.load(f)
        additional_tokens_to_ids = {
            v: k
            for k, v in additional_ids_to_tokens.items()
        }
Example #6
0
import os
import sys
import logging
import gdown
from zipfile import ZipFile

result_url = 'https://drive.google.com/a/g2.nctu.edu.tw/uc?id=1c3ocyO-gDmv-OstM0XbBQdczWGWCANu3&export=download'
result_name = 'results'
if not os.path.isdir(result_name):
    gdown.download(result_url, output=result_name + '.zip', quiet=False)
    zip1 = ZipFile(result_name + '.zip')
    zip1.extractall(result_name)
    zip1.close()
    os.remove(result_name + ".zip")

print("Finished downloading results.") 
Example #7
0
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import gdown
import os

## TC TL MODEL
url = 'https://drive.google.com/uc?id=1Rb9gKSDdLC8y8yMcDqeOHAGj9qA0mDLJ&export=download'
output = '/workspace/python/jupyter_notebook/Tropical_Cyclone_Intensity_Estimation/trained_16.h5'
gdown.download(url, output, quiet=False, proxy=None)
## TC Dataset
url = 'https://drive.google.com/uc?id=1vMXpbWx_-DO8CNkG68eErzcREvfouT5d&export=download'
output = '/workspace/python/jupyter_notebook/Tropical_Cyclone_Intensity_Estimation/dataset.zip'
gdown.cached_download(url,
                      output,
                      quiet=False,
                      proxy=None,
                      postprocess=gdown.extractall)
Example #8
0
import os
import gdown
import subprocess
from tqdm import trange
from pathlib import Path


if __name__ == '__main__':

    url = list()
    destination = list()

    url.append('https://drive.google.com/uc?id=1mjb4ioDRH8ViGbui52stSUDwhkGrDXy8')
    destination.append(Path(os.path.realpath(__file__)).parent/'weights/struct2depth_model_kitti.tar.gz')

    url.append(' https://drive.google.com/uc?id=11SzYIezaF8yaIVKAml7kPdqgncna2vj7')
    destination.append(Path(os.path.realpath(__file__)).parent/'weights/pwcnet.ckpt-595000.data-00000-of-00001')

    url.append(' https://drive.google.com/uc?id=1guw6rpVRsO9OfKnuKGGeUY0kpNfJf4yy')
    destination.append(Path(os.path.realpath(__file__)).parent/'weights/pwcnet.ckpt-595000.index')

    url.append(' https://drive.google.com/uc?id=1w8DgWut4APWZpprGxPvCbvmg8sJZ11-u')
    destination.append(Path(os.path.realpath(__file__)).parent/'weights/pwcnet.ckpt-595000.meta')

    for i in trange(len(url)):
        gdown.download(url[i], destination[i].as_posix(), quiet=False)

    subprocess.run(f'tar -C {destination[0].parent.as_posix()} -xf {destination[0]}', shell=True, check=True)
    subprocess.run([f'rm {destination[0].as_posix()}'], shell=True, check=True)
Example #9
0
def init_pretrained_weights(model, key=''):
    """Initializes model with pretrained weights.
    
    Layers that don't match with pretrained layers in name or size are kept unchanged.
    """
    import os
    import errno
    import gdown
    from collections import OrderedDict

    def _get_torch_home():
        ENV_TORCH_HOME = 'TORCH_HOME'
        ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
        DEFAULT_CACHE_DIR = '~/.cache'
        torch_home = os.path.expanduser(
            os.getenv(ENV_TORCH_HOME,
                      os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch')))
        return torch_home
    
    torch_home = _get_torch_home()
    model_dir = os.path.join(torch_home, 'checkpoints')
    try:
        os.makedirs(model_dir)
    except OSError as e:
        if e.errno == errno.EEXIST:
            # Directory already exists, ignore.
            pass
        else:
            # Unexpected OSError, re-raise.
            raise
    filename = key + '_imagenet.pth'
    cached_file = os.path.join(model_dir, filename)

    if not os.path.exists(cached_file):
        gdown.download(pretrained_urls[key], cached_file, quiet=False)

    state_dict = torch.load(cached_file)
    model_dict = model.state_dict()
    new_state_dict = OrderedDict()
    matched_layers, discarded_layers = [], []
    
    for k, v in state_dict.items():
        if k.startswith('module.'):
            k = k[7:] # discard module.
        
        if k in model_dict and model_dict[k].size() == v.size():
            new_state_dict[k] = v
            matched_layers.append(k)
        else:
            discarded_layers.append(k)
    
    model_dict.update(new_state_dict)
    model.load_state_dict(model_dict)
    
    if len(matched_layers) == 0:
        warnings.warn(
            'The pretrained weights from "{}" cannot be loaded, '
            'please check the key names manually '
            '(** ignored and continue **)'.format(cached_file))
    else:
        print('Successfully loaded imagenet pretrained weights from "{}"'.format(cached_file))
        if len(discarded_layers) > 0:
            print('** The following layers are discarded '
                  'due to unmatched keys or layer size: {}'.format(discarded_layers))
Example #10
0
def player(pet, filename, q_in, q_out, q_quit, width, height, preparation_time,
           turn_time):
    # If user provides a Google Colab shared link, we use it
    if filename[:4] == "http" and "colab" in filename:
        file_id = filename.split("/")[-1].split("?")[0]
        url = "https://drive.google.com/uc?id=" + file_id
        base_dir = os.path.dirname(
            os.path.realpath(__file__)) + os.path.sep + "AIs" + os.path.sep
        ipynb_file_name = base_dir + file_id + ".ipynb"
        gdown.download(url, ipynb_file_name)
        with open(ipynb_file_name, "r") as ipynb_file:
            #notebook_name = ipynb_file.read().split(".ipynb")[0].split("name\":\"")[1]
            notebook_json = json.load(ipynb_file)
            notebook_name = notebook_json["metadata"]["colab"]["name"]
            if notebook_name.endswith(".ipynb"):
                notebook_name = notebook_name[:-len(".ipynb")]
        py_file_name = base_dir + notebook_name + ".py"
        ipynb_py_convert.convert(ipynb_file_name, py_file_name)
        os.remove(ipynb_file_name)
        filename = py_file_name
    # If user provides a local notebook file
    if filename[-6:] == ".ipynb":
        base_dir = os.path.dirname(
            os.path.realpath(__file__)) + os.path.sep + "AIs" + os.path.sep
        file_id = filename.split(os.path.sep)[-1].split(".ipynb")[0]
        py_file_name = base_dir + file_id + ".py"
        ipynb_py_convert.convert(filename, py_file_name)
        filename = py_file_name
    # We try to launch a regular AI
    try:
        player = importlib.util.spec_from_file_location("player", filename)
        module = importlib.util.module_from_spec(player)
        player.loader.exec_module(module)
        existence = True
    # In case there is a problem, we launch the dummy AI which basically does nothing
    except:
        if filename != "":
            var = traceback.format_exc()
            print("Error: " + var, file=sys.stderr)
            print("Error while loading player controlling " + pet +
                  ", dummy player loaded instead",
                  file=sys.stderr)
        player = importlib.util.spec_from_file_location(
            "player", "resources" + os.path.sep + "imports" + os.path.sep +
            "dummy_player.py")
        module = importlib.util.module_from_spec(player)
        player.loader.exec_module(module)
        existence = False
    # We retrieve the essential parts
    name = filename.split(str(os.path.sep))[-1].split(".")[0]
    preprocessing = module.preprocessing
    turn = module.turn
    # We communicate our name to the main program
    q_out.put(name)
    # And we get useful information in return
    maze, player1_location, player2_location, pieces_of_cheese = q_in.get()
    # Then we call the preprocessing function and catch any exception
    try:
        before = time.time()
        preprocessing(maze, width, height, player1_location, player2_location,
                      pieces_of_cheese, preparation_time)
        after = time.time()
        prep_time = after - before
    except Exception as e:
        traceback.print_exc()
        print(
            e,
            file=sys.stderr,
        )
    # We run each turn through this loop
    try:
        turn_delay = 0
        turn_delay_count = 0
        while 1:
            # We get the new info
            try:
                player1_location, player2_location, score1, score2, pieces_of_cheese = q_in.get(
                )
                while not (q_in.empty()):
                    player1_location, player2_location, score1, score2, pieces_of_cheese = q_in.get(
                    )
            except:
                break
            if player1_location == None:
                break
            # Then we check if the main program ask us to exit
            try:
                if q_quit.get():
                    break
            except:
                break
            # We now ask the AI what to do
            if pieces_of_cheese == []:
                break
            try:
                before = time.time()
                decision = turn(maze, width, height, player1_location,
                                player2_location, score1, score2,
                                pieces_of_cheese, turn_time)
                after = time.time()
                turn_delay = turn_delay + (after - before)
                turn_delay_count = turn_delay_count + 1
            except Exception as e:
                traceback.print_exc()
                print(e, file=sys.stderr)
                decision = ""
            # Finally we send the decision to the main program
            try:
                q_out.put(decision)
            except:
                ()
    except:
        ()
    player1_location, player2_location, score1, score2, pieces_of_cheese = q_in.get(
    )
    if args.postprocessing:
        try:
            module.postprocessing(maze, width, height, player1_location,
                                  player2_location, score1, score2,
                                  pieces_of_cheese, turn_time)
        except Exception as e:
            traceback.print_exc()
            print(
                e,
                file=sys.stderr,
            )
    try:
        q_out.put((prep_time, turn_delay / turn_delay_count))
    except:
        q_out.put((0, 0))
import gdown
from fasttext.util import download_model

if __name__ == '__main__':

    print('Download job offers data')
    offers_url = 'https://drive.google.com/uc?export=download&confirm=A6wL&id=1tI4SctLNkZU6vJuBw1Hf1lVqephc35cG'
    gdown.download(offers_url, 'data/all_offers.csv', quiet=False)

    print('Download FastText representations of job offers')
    vectors_url = 'https://drive.google.com/uc?export=download&confirm=-GH4&id=1m_ckxOk4Ga884ai9mopnSj7gmvb1t5tG'
    gdown.download(vectors_url, 'data/offers_fasttext.npy', quiet=False)

    print('Download FastText French model')
    download_model('fr', if_exists='ignore')
Example #12
0
def find(img_path,
         db_path,
         model_name='VGG-Face',
         distance_metric='cosine',
         model=None,
         enforce_detection=True,
         detector_backend='opencv'):
    model_names = ['VGG-Face', 'Facenet', 'OpenFace', 'DeepFace']
    metric_names = ['cosine', 'euclidean', 'euclidean_l2']

    tic = time.time()

    if type(img_path) == list:
        bulkProcess = True
        img_paths = img_path.copy()
    else:
        bulkProcess = False
        img_paths = [img_path]

    if os.path.isdir(db_path) == True:

        # ---------------------------------------

        if model == None:
            if model_name == 'VGG-Face':
                print("Using VGG-Face model backend and", distance_metric,
                      "distance.")
                model = VGGFace.loadModel()
            elif model_name == 'OpenFace':
                print("Using OpenFace model backend", distance_metric,
                      "distance.")
                model = OpenFace.loadModel()
            elif model_name == 'Facenet':
                print("Using Facenet model backend", distance_metric,
                      "distance.")
                model = Facenet.loadModel()
            elif model_name == 'DeepFace':
                print("Using FB DeepFace model backend", distance_metric,
                      "distance.")
                model = FbDeepFace.loadModel()
            elif model_name == 'DeepID':
                print("Using DeepID model backend", distance_metric,
                      "distance.")
                model = DeepID.loadModel()
            elif model_name == 'Dlib':
                print("Using Dlib ResNet model backend", distance_metric,
                      "distance.")
                from deepface.basemodels.DlibResNet import DlibResNet  # this is not a must because it is very huge
                model = DlibResNet()
            elif model_name == 'Ensemble':
                print("Ensemble learning enabled")
                # TODO: include DeepID in ensemble method

                import lightgbm as lgb  # lightgbm==2.3.1

                models = {}

                pbar = tqdm(range(0, len(model_names)),
                            desc='Face recognition models')

                for index in pbar:
                    if index == 0:
                        pbar.set_description("Loading VGG-Face")
                        models['VGG-Face'] = VGGFace.loadModel()
                    elif index == 1:
                        pbar.set_description("Loading FaceNet")
                        models['Facenet'] = Facenet.loadModel()
                    elif index == 2:
                        pbar.set_description("Loading OpenFace")
                        models['OpenFace'] = OpenFace.loadModel()
                    elif index == 3:
                        pbar.set_description("Loading DeepFace")
                        models['DeepFace'] = FbDeepFace.loadModel()

            else:
                raise ValueError("Invalid model_name passed - ", model_name)
        else:  # model != None
            print("Already built model is passed")

            if model_name == 'Ensemble':

                import lightgbm as lgb  # lightgbm==2.3.1

                # validate model dictionary because it might be passed from input as pre-trained

                found_models = []
                for key, value in model.items():
                    found_models.append(key)

                if ('VGG-Face'
                        in found_models) and ('Facenet' in found_models) and (
                            'OpenFace' in found_models) and ('DeepFace'
                                                             in found_models):
                    print("Ensemble learning will be applied for ",
                          found_models, " models")
                else:
                    raise ValueError(
                        "You would like to apply ensemble learning and pass pre-built models but models must contain [VGG-Face, Facenet, OpenFace, DeepFace] but you passed "
                        + found_models)

                models = model.copy()

        # threshold = functions.findThreshold(model_name, distance_metric)

        # ---------------------------------------

        file_name = "representations_%s.pkl" % (model_name)
        file_name = file_name.replace("-", "_").lower()

        if path.exists(db_path + "/" + file_name):

            print(
                "WARNING: Representations for images in ", db_path,
                " folder were previously stored in ", file_name,
                ". If you added new instances after this file creation, then please delete this file and call find function again. It will create it again."
            )

            f = open(db_path + '/' + file_name, 'rb')
            representations = pickle.load(f)

            print("There are ", len(representations),
                  " representations found in ", file_name)

        else:
            employees = []

            for r, d, f in os.walk(
                    db_path):  # r=root, d=directories, f = files
                for file in f:
                    if ('.jpg' in file):
                        exact_path = r + "/" + file
                        employees.append(exact_path)

            if len(employees) == 0:
                raise ValueError("There is no image in ", db_path, " folder!")

            # ------------------------
            # find representations for db images

            representations = []

            pbar = tqdm(range(0, len(employees)),
                        desc='Finding representations')

            # for employee in employees:
            for index in pbar:
                employee = employees[index]

                if model_name != 'Ensemble':

                    if model_name == 'Dlib':  # non-keras model
                        input_shape = (150, 150, 3)
                    else:
                        # input_shape = model.layers[0].input_shape[1:3] #my environment returns (None, 224, 224, 3) but some people mentioned that they got [(None, 224, 224, 3)]. I think this is because of version issue.

                        input_shape = model.layers[0].input_shape

                        if type(input_shape) == list:
                            input_shape = input_shape[0][1:3]
                        else:
                            input_shape = input_shape[1:3]

                    # ---------------------

                    input_shape_x = input_shape[0]
                    input_shape_y = input_shape[1]

                    img = functions.preprocess_face(
                        img=employee,
                        target_size=(input_shape_y, input_shape_x),
                        enforce_detection=enforce_detection,
                        detector_backend=detector_backend)
                    representation = model.predict(img)[0, :]

                    instance = []
                    instance.append(employee)
                    instance.append(representation)

                else:  # ensemble learning

                    instance = []
                    instance.append(employee)

                    for j in model_names:
                        ensemble_model = models[j]

                        # input_shape = model.layers[0].input_shape[1:3] #my environment returns (None, 224, 224, 3) but some people mentioned that they got [(None, 224, 224, 3)]. I think this is because of version issue.

                        input_shape = ensemble_model.layers[0].input_shape

                        if type(input_shape) == list:
                            input_shape = input_shape[0][1:3]
                        else:
                            input_shape = input_shape[1:3]

                        input_shape_x = input_shape[0]
                        input_shape_y = input_shape[1]

                        img = functions.preprocess_face(
                            img=employee,
                            target_size=(input_shape_y, input_shape_x),
                            enforce_detection=enforce_detection,
                            detector_backend=detector_backend)
                        representation = ensemble_model.predict(img)[0, :]
                        instance.append(representation)

                # -------------------------------

                representations.append(instance)

            f = open(db_path + '/' + file_name, "wb")
            pickle.dump(representations, f)
            f.close()

            print(
                "Representations stored in ", db_path, "/", file_name,
                " file. Please delete this file when you add new identities in your database."
            )

        # ----------------------------
        # we got representations for database

        if model_name != 'Ensemble':
            df = pd.DataFrame(representations,
                              columns=["identity", "representation"])
        else:  # ensemble learning
            df = pd.DataFrame(representations,
                              columns=[
                                  "identity", "VGG-Face_representation",
                                  "Facenet_representation",
                                  "OpenFace_representation",
                                  "DeepFace_representation"
                              ])

        df_base = df.copy()

        resp_obj = []

        global_pbar = tqdm(range(0, len(img_paths)), desc='Analyzing')
        for j in global_pbar:
            img_path = img_paths[j]

            # find representation for passed image

            if model_name == 'Ensemble':
                for j in model_names:
                    ensemble_model = models[j]

                    # input_shape = ensemble_model.layers[0].input_shape[1:3] #my environment returns (None, 224, 224, 3) but some people mentioned that they got [(None, 224, 224, 3)]. I think this is because of version issue.

                    input_shape = ensemble_model.layers[0].input_shape

                    if type(input_shape) == list:
                        input_shape = input_shape[0][1:3]
                    else:
                        input_shape = input_shape[1:3]

                    img = functions.preprocess_face(
                        img=img_path,
                        target_size=input_shape,
                        enforce_detection=enforce_detection,
                        detector_backend=detector_backend)
                    target_representation = ensemble_model.predict(img)[0, :]

                    for k in metric_names:
                        distances = []
                        for index, instance in df.iterrows():
                            source_representation = instance[
                                "%s_representation" % (j)]

                            if k == 'cosine':
                                distance = dst.findCosineDistance(
                                    source_representation,
                                    target_representation)
                            elif k == 'euclidean':
                                distance = dst.findEuclideanDistance(
                                    source_representation,
                                    target_representation)
                            elif k == 'euclidean_l2':
                                distance = dst.findEuclideanDistance(
                                    dst.l2_normalize(source_representation),
                                    dst.l2_normalize(target_representation))

                            distances.append(distance)

                        if j == 'OpenFace' and k == 'euclidean':
                            continue
                        else:
                            df["%s_%s" % (j, k)] = distances

                # ----------------------------------

                feature_names = []
                for j in model_names:
                    for k in metric_names:
                        if j == 'OpenFace' and k == 'euclidean':
                            continue
                        else:
                            feature = '%s_%s' % (j, k)
                            feature_names.append(feature)

                # print(df[feature_names].head())

                x = df[feature_names].values

                # ----------------------------------
                # lightgbm model
                home = str(Path.home())

                if os.path.isfile(
                        home +
                        '/.deepface/weights/face-recognition-ensemble-model.txt'
                ) != True:
                    print(
                        "face-recognition-ensemble-model.txt will be downloaded..."
                    )
                    url = 'https://raw.githubusercontent.com/serengil/deepface/master/deepface/models/face-recognition-ensemble-model.txt'
                    output = home + '/.deepface/weights/face-recognition-ensemble-model.txt'
                    gdown.download(url, output, quiet=False)

                ensemble_model_path = home + '/.deepface/weights/face-recognition-ensemble-model.txt'

                deepface_ensemble = lgb.Booster(model_file=ensemble_model_path)

                y = deepface_ensemble.predict(x)

                verified_labels = []
                scores = []
                for i in y:
                    verified = np.argmax(i) == 1
                    score = i[np.argmax(i)]

                    verified_labels.append(verified)
                    scores.append(score)

                df['verified'] = verified_labels
                df['score'] = scores

                df = df[df.verified == True]
                # df = df[df.score > 0.99] #confidence score
                df = df.sort_values(by=["score"],
                                    ascending=False).reset_index(drop=True)
                df = df[['identity', 'verified', 'score']]

                resp_obj.append(df)
                df = df_base.copy()  # restore df for the next iteration

            # ----------------------------------

            if model_name != 'Ensemble':

                if model_name == 'Dlib':  # non-keras model
                    input_shape = (150, 150, 3)
                else:
                    # input_shape = model.layers[0].input_shape[1:3] #my environment returns (None, 224, 224, 3) but some people mentioned that they got [(None, 224, 224, 3)]. I think this is because of version issue.

                    input_shape = model.layers[0].input_shape

                    if type(input_shape) == list:
                        input_shape = input_shape[0][1:3]
                    else:
                        input_shape = input_shape[1:3]

                # ------------------------

                input_shape_x = input_shape[0]
                input_shape_y = input_shape[1]

                img = functions.preprocess_face(
                    img=img_path,
                    target_size=(input_shape_y, input_shape_x),
                    enforce_detection=enforce_detection,
                    detector_backend=detector_backend)
                target_representation = model.predict(img)[0, :]

                distances = []
                for index, instance in df.iterrows():
                    source_representation = instance["representation"]

                    if distance_metric == 'cosine':
                        distance = dst.findCosineDistance(
                            source_representation, target_representation)
                    elif distance_metric == 'euclidean':
                        distance = dst.findEuclideanDistance(
                            source_representation, target_representation)
                    elif distance_metric == 'euclidean_l2':
                        distance = dst.findEuclideanDistance(
                            dst.l2_normalize(source_representation),
                            dst.l2_normalize(target_representation))
                    else:
                        raise ValueError("Invalid distance_metric passed - ",
                                         distance_metric)

                    distances.append(distance)

                threshold = functions.findThreshold(model_name,
                                                    distance_metric)

                df["distance"] = distances
                df = df.drop(columns=["representation"])
                df = df[df.distance <= threshold]

                df = df.sort_values(by=["distance"],
                                    ascending=True).reset_index(drop=True)
                resp_obj.append(df)
                df = df_base.copy()  # restore df for the next iteration

        toc = time.time()

        print("find function lasts ", toc - tic, " seconds")

        if len(resp_obj) == 1:
            return resp_obj[0]

        return resp_obj

    else:
        raise ValueError("Passed db_path does not exist!")

    return None
Example #13
0
def verify(img1_path,
           img2_path='',
           model_name='VGG-Face',
           distance_metric='cosine',
           model=None,
           enforce_detection=True,
           detector_backend='ssd'):
    print(model_name)
    tic = time.time()

    if type(img1_path) == list:
        bulkProcess = True
        img_list = img1_path.copy()
    else:
        bulkProcess = False
        img_list = [[img1_path, img2_path]]

    # ------------------------------

    resp_objects = []

    if model_name == 'Ensemble':
        print("Ensemble learning enabled")

        import lightgbm as lgb  # lightgbm==2.3.1

        if model == None:
            model = {}

            model_pbar = tqdm(range(0, 4), desc='Face recognition models')

            for index in model_pbar:

                if index == 0:
                    model_pbar.set_description("Loading VGG-Face")
                    model["VGG-Face"] = VGGFace.loadModel()
                elif index == 1:
                    model_pbar.set_description("Loading Google FaceNet")
                    model["Facenet"] = Facenet.loadModel()
                elif index == 2:
                    model_pbar.set_description("Loading OpenFace")
                    model["OpenFace"] = OpenFace.loadModel()
                elif index == 3:
                    model_pbar.set_description("Loading Facebook DeepFace")
                    model["DeepFace"] = FbDeepFace.loadModel()

        # --------------------------
        # validate model dictionary because it might be passed from input as pre-trained

        found_models = []
        for key, value in model.items():
            found_models.append(key)

        if ('VGG-Face' in found_models) and ('Facenet' in found_models) and (
                'OpenFace' in found_models) and ('DeepFace' in found_models):
            print("Ensemble learning will be applied for ", found_models,
                  " models")
        else:
            raise ValueError(
                "You would like to apply ensemble learning and pass pre-built models but models must contain [VGG-Face, Facenet, OpenFace, DeepFace] but you passed "
                + found_models)

        # --------------------------

        model_names = ["VGG-Face", "Facenet", "OpenFace", "DeepFace"]
        metrics = ["cosine", "euclidean", "euclidean_l2"]

        pbar = tqdm(range(0, len(img_list)), desc='Verification')

        # for instance in img_list:
        for index in pbar:
            instance = img_list[index]

            if type(instance) == list and len(instance) >= 2:
                img1_path = instance[0]
                img2_path = instance[1]

                ensemble_features = []
                ensemble_features_string = "["

                for i in model_names:
                    custom_model = model[i]

                    # input_shape = custom_model.layers[0].input_shape[1:3] #my environment returns (None, 224, 224, 3) but some people mentioned that they got [(None, 224, 224, 3)]. I think this is because of version issue.

                    input_shape = custom_model.layers[0].input_shape

                    if type(input_shape) == list:
                        input_shape = input_shape[0][1:3]
                    else:
                        input_shape = input_shape[1:3]

                    img1 = functions.preprocess_face(
                        img=img1_path,
                        target_size=input_shape,
                        enforce_detection=enforce_detection,
                        detector_backend=detector_backend)
                    img2 = functions.preprocess_face(
                        img=img2_path,
                        target_size=input_shape,
                        enforce_detection=enforce_detection,
                        detector_backend=detector_backend)

                    img1_representation = custom_model.predict(img1)[0, :]
                    img2_representation = custom_model.predict(img2)[0, :]

                    for j in metrics:
                        if j == 'cosine':
                            distance = dst.findCosineDistance(
                                img1_representation, img2_representation)
                        elif j == 'euclidean':
                            distance = dst.findEuclideanDistance(
                                img1_representation, img2_representation)
                        elif j == 'euclidean_l2':
                            distance = dst.findEuclideanDistance(
                                dst.l2_normalize(img1_representation),
                                dst.l2_normalize(img2_representation))

                        if i == 'OpenFace' and j == 'euclidean':  # this returns same with OpenFace - euclidean_l2
                            continue
                        else:

                            ensemble_features.append(distance)

                            if len(ensemble_features) > 1:
                                ensemble_features_string += ", "
                            ensemble_features_string += str(distance)

                # print("ensemble_features: ", ensemble_features)
                ensemble_features_string += "]"

                # -------------------------------
                # find deepface path

                home = str(Path.home())

                if os.path.isfile(
                        home +
                        '/.deepface/weights/face-recognition-ensemble-model.txt'
                ) != True:
                    print(
                        "face-recognition-ensemble-model.txt will be downloaded..."
                    )
                    url = 'https://raw.githubusercontent.com/serengil/deepface/master/deepface/models/face-recognition-ensemble-model.txt'
                    output = home + '/.deepface/weights/face-recognition-ensemble-model.txt'
                    gdown.download(url, output, quiet=False)

                ensemble_model_path = home + '/.deepface/weights/face-recognition-ensemble-model.txt'

                # print(ensemble_model_path)

                # -------------------------------

                deepface_ensemble = lgb.Booster(model_file=ensemble_model_path)

                prediction = deepface_ensemble.predict(
                    np.expand_dims(np.array(ensemble_features), axis=0))[0]

                verified = np.argmax(prediction) == 1
                if verified:
                    identified = "true"
                else:
                    identified = "false"

                score = prediction[np.argmax(prediction)]

                # print("verified: ", verified,", score: ", score)

                resp_obj = "{"
                resp_obj += "\"verified\": " + identified
                resp_obj += ", \"score\": " + str(score)
                resp_obj += ", \"distance\": " + ensemble_features_string
                resp_obj += ", \"model\": [\"VGG-Face\", \"Facenet\", \"OpenFace\", \"DeepFace\"]"
                resp_obj += ", \"similarity_metric\": [\"cosine\", \"euclidean\", \"euclidean_l2\"]"
                resp_obj += "}"

                # print(resp_obj)

                resp_obj = json.loads(resp_obj)  # string to json

                if bulkProcess == True:
                    resp_objects.append(resp_obj)
                else:
                    return resp_obj

            # -------------------------------

        if bulkProcess == True:
            resp_obj = "{"

            for i in range(0, len(resp_objects)):
                resp_item = json.dumps(resp_objects[i])

                if i > 0:
                    resp_obj += ", "

                resp_obj += "\"pair_" + str(i + 1) + "\": " + resp_item
            resp_obj += "}"
            resp_obj = json.loads(resp_obj)
            return resp_obj

        return None

    # ensemble learning block end
    # --------------------------------
    # ensemble learning disabled

    if model == None:
        if model_name == 'VGG-Face':
            print("Using VGG-Face model backend and", distance_metric,
                  "distance.")
            model = VGGFace.loadModel()

        elif model_name == 'OpenFace':
            print("Using OpenFace model backend", distance_metric, "distance.")
            model = OpenFace.loadModel()

        elif model_name == 'Facenet':
            print("Using Facenet model backend", distance_metric, "distance.")
            model = Facenet.loadModel()

        elif model_name == 'DeepFace':
            print("Using FB DeepFace model backend", distance_metric,
                  "distance.")
            model = FbDeepFace.loadModel()

        elif model_name == 'DeepID':
            print("Using DeepID2 model backend", distance_metric, "distance.")
            model = DeepID.loadModel()

        elif model_name == 'Dlib':
            print("Using Dlib ResNet model backend", distance_metric,
                  "distance.")
            from deepface.basemodels.DlibResNet import DlibResNet  # this is not a must because it is very huge.
            model = DlibResNet()

        else:
            raise ValueError("Invalid model_name passed - ", model_name)
    else:  # model != None
        print("Already built model is passed")

    # ------------------------------
    # face recognition models have different size of inputs
    # my environment returns (None, 224, 224, 3) but some people mentioned that they got [(None, 224, 224, 3)]. I think this is because of version issue.

    if model_name == 'Dlib':  # this is not a regular keras model
        input_shape = (150, 150, 3)

    else:  # keras based models
        input_shape = model.layers[0].input_shape

        if type(input_shape) == list:
            input_shape = input_shape[0][1:3]
        else:
            input_shape = input_shape[1:3]

    input_shape_x = input_shape[0]
    input_shape_y = input_shape[1]

    # ------------------------------

    # tuned thresholds for model and metric pair
    threshold = functions.findThreshold(model_name, distance_metric)

    # ------------------------------

    # calling deepface in a for loop causes lots of progress bars. this prevents it.
    disable_option = False if len(img_list) > 1 else True

    pbar = tqdm(range(0, len(img_list)),
                desc='Verification',
                disable=disable_option)

    # for instance in img_list:
    for index in pbar:

        instance = img_list[index]

        if type(instance) == list and len(instance) >= 2:
            print(img1_path)
            img1_path = instance[0]
            img2_path = instance[1]

            # ----------------------
            # crop and align faces

            img1 = functions.preprocess_face(
                img=img1_path,
                target_size=(input_shape_y, input_shape_x),
                enforce_detection=enforce_detection,
                detector_backend=detector_backend)
            img2 = functions.preprocess_face(
                img=img2_path,
                target_size=(input_shape_y, input_shape_x),
                enforce_detection=enforce_detection,
                detector_backend=detector_backend)

            # ----------------------
            # find embeddings

            img1_representation = model.predict(img1)[0, :]
            img2_representation = model.predict(img2)[0, :]

            # ----------------------
            # find distances between embeddings

            if distance_metric == 'cosine':
                distance = dst.findCosineDistance(img1_representation,
                                                  img2_representation)
            elif distance_metric == 'euclidean':
                distance = dst.findEuclideanDistance(img1_representation,
                                                     img2_representation)
            elif distance_metric == 'euclidean_l2':
                distance = dst.findEuclideanDistance(
                    dst.l2_normalize(img1_representation),
                    dst.l2_normalize(img2_representation))
            else:
                raise ValueError("Invalid distance_metric passed - ",
                                 distance_metric)

            # ----------------------
            # decision

            if distance <= threshold:
                identified = "true"
            else:
                identified = "false"

            # ----------------------
            # response object

            resp_obj = "{"
            resp_obj += "\"verified\": " + identified
            resp_obj += ", \"distance\": " + str(distance)
            resp_obj += ", \"max_threshold_to_verify\": " + str(threshold)
            resp_obj += ", \"model\": \"" + model_name + "\""
            resp_obj += ", \"similarity_metric\": \"" + distance_metric + "\""
            resp_obj += "}"

            resp_obj = json.loads(resp_obj)  # string to json

            if bulkProcess == True:
                resp_objects.append(resp_obj)
            else:
                # K.clear_session()
                return resp_obj
        # ----------------------

        else:
            raise ValueError("Invalid arguments passed to verify function: ",
                             instance)

    # -------------------------

    toc = time.time()

    # print("identification lasts ",toc-tic," seconds")

    if bulkProcess == True:
        resp_obj = "{"

        for i in range(0, len(resp_objects)):
            resp_item = json.dumps(resp_objects[i])

            if i > 0:
                resp_obj += ", "

            resp_obj += "\"pair_" + str(i + 1) + "\": " + resp_item
        resp_obj += "}"
        resp_obj = json.loads(resp_obj)
        return resp_obj
Example #14
0
import pandas as pd
import os
import gdown

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import metrics
from sklearn import linear_model
from sklearn.model_selection import train_test_split

gdown.download(
    'https://drive.google.com/uc?id=12WcGMubUMHGuS5-wj3PadyQIsJiZZTcd',
    'heart_data.csv', True)

data_path = 'heart_data.csv'
heart_data = pd.read_csv(data_path)
heart_data = heart_data.dropna()  # remove rows with missing values

sns.catplot(x='target', y='oldpeak', data=heart_data)
plt.show()

sns.scatterplot(x='trestbps', y='chol', data=heart_data)
plt.show()

train_df, test_df = train_test_split(heart_data, test_size=0.4, random_state=1)
input_labels = [
    'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'exang', 'oldpeak', 'ca',
    'thal'
]
output_labels = 'target'
Example #15
0
def dl_unzip(frm, name):
    zfpath = str(model_path) + name + '.zip'
    if not os.path.exists(zfpath):
        gdown.download(frm, model_path + name + '.zip', quiet=False)
    with zipfile.ZipFile(zfpath, 'r') as zf:
        zf.extractall(path=model_path)
Example #16
0
#!/usr/bin/env python3

# Copied from a notebook by the ML profs

import pandas as pd
import gdown
from sklearn.feature_extraction.text import CountVectorizer
import nltk

if not os.path.exists("articles1.csv"):
    gdown.download(
        "https://drive.google.com/uc?authuser=0&id=1T8V87Hdz2IvhKjzwzKyLWA4vI6sA2wTX&export=download",
        "articles1.csv",
        quiet=False,
    )
df = pd.read_csv("articles1.csv")

# NLTK has a built-in module for extracting words from text.
# This takes a few minutes to run, so be patient.

nltk.download("punkt")


def remove_punctuation(article):
    # substitute in a regular apostrophe for '’' to work with word_tokenize
    article = article.replace("’", "'")
    tokens = nltk.tokenize.word_tokenize(article)
    words = list(filter(lambda w: any(x.isalpha() for x in w), tokens))
    return " ".join(words)

Example #17
0
def download_weights(url, output, verbose=False):
    gdown.download(url, output, quiet=verbose)
Example #18
0
def create_collection_for(ws, overwrite=False):
    name = slugify(ws.title)
    print(f"{ws.title} -> {name}")

    data_dir = DATA_DIR / name
    data_dir.mkdir(exist_ok=True)

    items = []

    for c_title, c_desc, c_auth, c_url in ws.iter_rows(min_row=2, max_col=4):
        if c_title.value is None:
            break

        print(c_title.value)

        # retrieve google drive ID from field
        # in-doc url fmt: https://docs.google.com/uc?id=[FILE_ID]&export=download
        # actual url fmt: https://drive.google.com/file/d/<id>/view?usp=sharing
        gid = re.search(r"([a-zA-Z0-9\_\-]+)/view", c_url.value).groups()[-1]

        # retrieve expected filename from map
        fname = FILENAMES_MAP.get(gid)
        fpath = data_dir / fname if fname else None
        # download if we dont already have it
        if not fname or (fpath and not fpath.exists()) or overwrite:
            fpath = pathlib.Path(
                gdown.download(f"https://drive.google.com/uc?id={gid}",
                               output=f"{data_dir}/"))
            orig_fpath = data_dir / fpath.relative_to(data_dir).name
            fpath = orig_fpath.with_suffix(".webm")
            # reencode video to webm
            reencode(
                orig_fpath,
                fpath,
                VideoWebmHigh().to_ffmpeg_args(),
                delete_src=True,
            )

            update_map(gid, fpath.relative_to(data_dir).name)
        else:
            print("  Skipping download")

        # update collection
        items.append({
            "title": c_title.value,
            "description": c_desc.value,
            "authors": c_auth.value,
            "files": [str(fpath.name)],
        })

    # dump collection to file
    with open(data_dir / "collection.json", "w") as fh:
        json.dump(items, fh, indent=4)

    # create ZIP
    zip_path = data_dir.parent / f"{name}.zip"
    if not zip_path.exists() or overwrite:
        zip_path.unlink(missing_ok=True)
        with cwd(data_dir):
            args = [
                "zip",
                "-r",
                "-0",
                "-T",
                f"{zip_path.resolve()}",
                "collection.json",
            ] + [f.name for f in data_dir.iterdir()]
            subprocess.run(args)
def model_weight_download():
    if not is_model_downloaded():
        if not os.path.exists(MODEL_FOLDER):
            os.makedirs(MODEL_FOLDER)
        gdown.download(MODEL_URL, MODEL_PATH, quiet=False)
        gdown.download(INCEPTION_BN_URL, INCEPTION_BN_PATH, quiet=False)
Example #20
0
import numpy as np

# Number of rows to read
# Set to None to read all rows
numRowsRead = 100

# URL of chess records file
zipfileUrl = 'https://drive.google.com/uc?export=download&confirm=BYO0&id=0Bw0y3jV73lx_aXE3RnhmeE5Rb1E'

# Name of zip file
zipName = 'all_with_filtered_anotations.zip'

# Downloads zip file.  750 MB!
if not exists(zipName):
    import gdown
    gdown.download(zipfileUrl, zipName, quiet=False)

# Read column names
columns = list(
    pd.read_csv(zipName, compression='zip', sep=' ', skiprows=4, nrows=1))
columns = list(map(lambda x: x.split('.')[1], columns[1:-1]))

# sets up function to clean up booleans in data
booleanColumns = columns[6:]
booleanConvert = lambda x: True if 'true' in x else False if 'false' in x else np.nan

# Read metadata for each game
metadata = pd.read_csv(zipName,
                       compression='zip',
                       sep=' ',
                       skiprows=5,
Example #21
0
def download_and_extract(url, path, extract_dir):
    gdown.download(url, path, False)
    tar = tarfile.open(path, 'r:gz')
    tar.extractall(path=extract_dir)
    tar.close()
Example #22
0
def initialize_detector(detector_backend):
    global face_detector
    home = str(Path.home())

    #eye detector is common for opencv and ssd
    if detector_backend == 'opencv' or detector_backend == 'ssd':
        opencv_path = get_opencv_path()
        eye_detector_path = opencv_path+"haarcascade_eye.xml"

        if os.path.isfile(eye_detector_path) != True:
            raise ValueError("Confirm that opencv is installed on your environment! Expected path ",eye_detector_path," violated.")

        global eye_detector
        eye_detector = cv2.CascadeClassifier(eye_detector_path)

    #------------------------------
    #face detectors
    if detector_backend == 'opencv':
        opencv_path = get_opencv_path()
        face_detector_path = opencv_path+"haarcascade_frontalface_default.xml"

        if os.path.isfile(face_detector_path) != True:
            raise ValueError("Confirm that opencv is installed on your environment! Expected path ",face_detector_path," violated.")

        face_detector = cv2.CascadeClassifier(face_detector_path)

    elif detector_backend == 'ssd':

        #check required ssd model exists in the home/.deepface/weights folder

        #model structure
        if os.path.isfile(home+'/.deepface/weights/deploy.prototxt') != True:

            print("deploy.prototxt will be downloaded...")

            url = "https://github.com/opencv/opencv/raw/3.4.0/samples/dnn/face_detector/deploy.prototxt"

            output = home+'/.deepface/weights/deploy.prototxt'

            gdown.download(url, output, quiet=False)

        #pre-trained weights
        if os.path.isfile(home+'/.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel') != True:

            print("res10_300x300_ssd_iter_140000.caffemodel will be downloaded...")

            url = "https://github.com/opencv/opencv_3rdparty/raw/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel"

            output = home+'/.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel'

            gdown.download(url, output, quiet=False)

        face_detector = cv2.dnn.readNetFromCaffe(
            home+"/.deepface/weights/deploy.prototxt",
            home+"/.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel"
        )

    elif detector_backend == 'dlib':
        import dlib #this is not a must library within deepface. that's why, I didn't put this import to a global level. version: 19.20.0

        global sp

        face_detector = dlib.get_frontal_face_detector()

        #check required file exists in the home/.deepface/weights folder
        if os.path.isfile(home+'/.deepface/weights/shape_predictor_5_face_landmarks.dat') != True:

            print("shape_predictor_5_face_landmarks.dat.bz2 is going to be downloaded")

            url = "http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2"
            output = home+'/.deepface/weights/'+url.split("/")[-1]

            gdown.download(url, output, quiet=False)

            zipfile = bz2.BZ2File(output)
            data = zipfile.read()
            newfilepath = output[:-4] #discard .bz2 extension
            open(newfilepath, 'wb').write(data)

        sp = dlib.shape_predictor(home+"/.deepface/weights/shape_predictor_5_face_landmarks.dat")

    elif detector_backend == 'mtcnn':
        face_detector = MTCNN()
def download_extract(gdrive_res):
  base_id = 'https://drive.google.com/uc?id='
  gdown.download(base_id+gdrive_res.gdriveid, gdrive_res.tarname, quiet=False)
  with tarfile.open(gdrive_res.tarname) as tf:
    tf.extractall()
Example #24
0
def загрузить_предобученную_модель():
  url = 'https://storage.googleapis.com/aiu_bucket/tokenizer_best.json'
  output = 'tokenizer_best.json' # Указываем имя файла, в который сохраняем файл
  gdown.download(url, output, quiet=True) # Скачиваем файл по указанному URL

  with open('tokenizer_best.json') as f:
    data = json.load(f)
    токенайзер = tokenizer_from_json(data)
  # url = ''
  # output = 'model_chatbot_100epochs(rms)+50(ada).h5'
  # gdown.download(url, output, quiet=True)  
  url = 'https://storage.googleapis.com/aiu_bucket/model_chatbot_100epochs(rms)%2B50(ada).h5'
  output = 'model_chatbot_100epochs(rms)%2B50(ada).h5' # Указываем имя файла, в который сохраняем файл
  gdown.download(url, output, quiet=True) # Скачиваем файл по указанному URL
  model = load_model('model_chatbot_100epochs(rms)%2B50(ada).h5')


  def strToTokens(sentence: str): # функция принимает строку на вход (предложение с вопросом)
    words = sentence.lower().split() # приводит предложение к нижнему регистру и разбирает на слова
    tokensList = list() # здесь будет последовательность токенов/индексов
    for word in words: # для каждого слова в предложении
      try:
        tokensList.append(токенайзер.word_index[word]) # определяем токенизатором индекс и добавляем в список
      except KeyError:
        pass
    # Функция вернёт вопрос в виде последовательности индексов, ограниченной длиной самого длинного вопроса из нашей базы вопросов
    return pad_sequences([tokensList], maxlen=13, padding='post')
  ######################
  # Устанавливаем связи между слоями рабочей модели и предобученной
  ######################
  def loadInferenceModels():
    encoderInputs = model.input[0]   # входом энкодера рабочей модели будет первый инпут предобученной модели(input_1)
    encoderEmbedding = model.layers[2] # связываем эмбединг слои(model.layers[2] это embedding_1)
    encoderOutputs, state_h_enc, state_c_enc = model.layers[4].output # вытягиваем аутпуты из первого LSTM слоя обуч.модели и даем энкодеру(lstm_1)
    encoderStates = [state_h_enc, state_c_enc] # ложим забранные состояния в состояния энкодера
    encoderModel = Model(encoderInputs, encoderStates) # формируем модель

    decoderInputs = model.input[1]   # входом декодера рабочей модели будет второй инпут предобученной модели(input_2)
    decoderStateInput_h = Input(shape=(200 ,)) # обозначим размерность для входного слоя с состоянием state_h
    decoderStateInput_c = Input(shape=(200 ,)) # обозначим размерность для входного слоя с состоянием state_c

    decoderStatesInputs = [decoderStateInput_h, decoderStateInput_c] # возьмем оба inputs вместе и запишем в decoderStatesInputs

    decoderEmbedding = model.layers[3] # связываем эмбединг слои(model.layers[3] это embedding_2)
    decoderLSTM = model.layers[5] # связываем LSTM слои(model.layers[5] это lstm_2)
    decoderOutputs, state_h, state_c = decoderLSTM(decoderEmbedding.output, initial_state=decoderStatesInputs)
    decoderStates = [state_h, state_c] # LSTM даст нам новые состояния

    decoderDense = model.layers[6] # связываем полносвязные слои(model.layers[6] это dense_1)
    decoderOutputs = decoderDense(decoderOutputs) # выход с LSTM мы пропустим через полносвязный слой с софтмаксом

      # Определим модель декодера, на входе далее будут раскодированные ответы (decoderForInputs) и состояния
      # на выходе предсказываемый ответ и новые состояния
    decoderModel = Model([decoderInputs] + decoderStatesInputs, [decoderOutputs] + decoderStates)
    return encoderModel , decoderModel

  ######################
  # Устанавливаем окончательные настройки и запускаем рабочую модель над предобученной
  ######################

  encModel , decModel = loadInferenceModels() # запускаем функцию для построения модели кодера и декодера
  
  display.clear_output(wait=True)
  print()
  print('Тест общения с ботом. (Для завершения наберите «Выход»)')
  while(True):
    # Получаем значения состояний, которые определит кодер в соответствии с заданным вопросом
    ques = input( 'Задайте вопрос : ' )
    if ques=='Выход':
	    break
    statesValues = encModel.predict(strToTokens(ques))
    # Создаём пустой массив размером (1, 1)
    emptyTargetSeq = np.zeros((1, 1))    
    emptyTargetSeq[0, 0] = токенайзер.word_index['start'] # положим в пустую последовательность начальное слово 'start' в виде индекса

    stopCondition = False # зададим условие, при срабатывании которого, прекратится генерация очередного слова
    decodedTranslation = '' # здесь будет собираться генерируемый ответ
    while not stopCondition : # пока не сработало стоп-условие
      # В модель декодера подадим пустую последовательность со словом 'start' и состояния предсказанные кодером по заданному вопросу.
      # декодер заменит слово 'start' предсказанным сгенерированным словом и обновит состояния
      decOutputs , h , c = decModel.predict([emptyTargetSeq] + statesValues)
      
      #argmax пробежит по вектору decOutputs'а[0,0,15104], найдет макс.значение, и вернёт нам номер индекса под которым оно лежит в массиве
      sampledWordIndex = np.argmax( decOutputs[0, 0, :]) # argmax возьмем от оси, в которой 15104 элементов. Получили индекс предсказанного слова.
      sampledWord = None # создаем переменную, в которую положим слово, преобразованное на естественный язык
      for word , index in токенайзер.word_index.items():
        if sampledWordIndex == index: # если индекс выбранного слова соответствует какому-то индексу из словаря
          decodedTranslation += ' {}'.format(word) # слово, идущее под этим индексом в словаре, добавляется в итоговый ответ 
          sampledWord = word # выбранное слово фиксируем в переменную sampledWord
      
      # Если выбранным словом оказывается 'end' либо если сгенерированный ответ превышает заданную максимальную длину ответа
      if sampledWord == 'end' or len(decodedTranslation.split()) > 13:
        stopCondition = True # то срабатывает стоп-условие и прекращаем генерацию

      emptyTargetSeq = np.zeros((1, 1)) # создаем пустой массив
      emptyTargetSeq[0, 0] = sampledWordIndex # заносим туда индекс выбранного слова
      statesValues = [h, c] # и состояния, обновленные декодером
      # и продолжаем цикл с обновленными параметрами
    
    print(decodedTranslation[:-3]) # выводим ответ сгенерированный декодером
Example #25
0
dataname = 'PointPattern'
num_graph = 15000
if phi == 0.3:
    ld_dir = 'hpr_phi03' + '_' + str(num_graph) + '/'
    url = 'https://drive.google.com/uc?id=1C3ciJsteqsKFVGF8JI8-KnXhe4zY41Ss'
    output = 'hpr_phi03' + '_' + str(num_graph) + '.zip'
if phi == 0.4:
    ld_dir = 'hpr_phi04' + '_' + str(num_graph) + '/'
    url = 'https://drive.google.com/uc?id=1rsTh09FzGxHculBVrYyl5tOHD9mxqc0G'
    output = 'hpr_phi04' + '_' + str(num_graph) + '.zip'
if phi == 0.35:
    ld_dir = 'hpr_phi035' + '_' + str(num_graph) + '/'
    url = 'https://drive.google.com/uc?id=16pI974P8WzanBUPrMHIaGfeSLoksviBk'
    output = 'hpr_phi035' + '_' + str(num_graph) + '.zip'
if not os.path.exists(output):
    gdown.download(url, output, quiet=False)
with zipfile.ZipFile(output, 'r') as zip_ref:
    zip_ref.extractall()
#os.remove(output)
# load edge_index
ld_edge_index = ld_dir + 'graph' + str(num_graph) + '_edge_index' + '.mat'
edge_index = sio.loadmat(ld_edge_index)
edge_index = edge_index['edge_index'][0]
# load feature
ld_feature = ld_dir + 'graph' + str(num_graph) + '_feature' + '.mat'
feature = sio.loadmat(ld_feature)
feature = feature['feature'][0]
# load label
ld_label = ld_dir + 'graph' + str(num_graph) + '_label' + '.mat'
label = sio.loadmat(ld_label)
label = label['label']
Example #26
0
def log2(x):
    return int(np.log2(x))


# we use different batch size for different resolution, so larger image size
# could fit into GPU memory. The keys is image resolution in log2
batch_sizes = {2: 16, 3: 16, 4: 16, 5: 16, 6: 16, 7: 8, 8: 4, 9: 2, 10: 1}
# We adjust the train step accordingly
train_step_ratio = {k: batch_sizes[2] / v for k, v in batch_sizes.items()}

os.makedirs("celeba_gan")

url = "https://drive.google.com/uc?id=1O7m1010EJjLE5QxLZiM9Fpjs7Oj6e684"
output = "celeba_gan/data.zip"
gdown.download(url, output, quiet=True)

with ZipFile("celeba_gan/data.zip", "r") as zipobj:
    zipobj.extractall("celeba_gan")

# Create a dataset from our folder, and rescale the images to the [0-1] range:

ds_train = keras.preprocessing.image_dataset_from_directory("celeba_gan",
                                                            label_mode=None,
                                                            image_size=(64,
                                                                        64),
                                                            batch_size=32)
ds_train = ds_train.map(lambda x: x / 255.0)


def resize_image(res, image):
Example #27
0
import gdown

url = 'https://drive.google.com/file/d/1dPjH-QAHACJ747QCDOLf3iawNKyHCN1E/view?usp=sharing'
id_ = url.split('/')[5]
url = 'https://drive.google.com/uc?id=' + id_
gdown.download(url, quiet=False)
Example #28
0
# parser.add_argument('--interactive', nargs='?',  type=bool, default=False, help='Set True to enter custom input')

# args = parser.parse_args()

home_dir = "./"
data_dir = os.path.join(home_dir, "datasets")
model_dir = os.path.join(home_dir, "model_dir")
if not os.path.isdir(model_dir):
    os.mkdir(model_dir)

tokenizer = identification.tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = os.path.join(model_dir, 'model_370_44_bioe.pt')
if not os.path.exists(model_path):
    url = 'https://drive.google.com/uc?id=1-5oN2lS37IcXT1Lhd-H3TxlEdi4MzVPC'
    gdown.download(url, model_path)

model = torch.load(model_path, map_location={'cuda:0': 'cpu'})


def get_dev_outputs(article_dir="dev-articles"):
    test_articles, test_article_ids = identification.read_articles(
        'dev-articles')
    test_spans = [[]] * len(test_articles)
    test_dataloader, test_sentences, test_bert_examples = identification.get_data(
        test_articles, test_spans, indices=np.arange(len(test_articles)))
    sps = identification.get_score(model,
                                   dataloader=test_dataloader,
                                   sentences=test_sentences,
                                   bert_examples=test_bert_examples,
                                   mode="test")
Example #29
0
    'benchmark':
    'https://drive.google.com/open?id=10boLBiYq-6wKC_N_71unlMyNrimRjpVa',
}


def parse_args():
    parser = argparse.ArgumentParser(description='Download Dataset')
    parser.add_argument('--data', default='part1', choices=data2url.keys())
    parser.add_argument('--tar_path', default='data.tar.gz', type=str)
    parser.add_argument('--force', action='store_true')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()
    url = data2url[args.data]
    tar_path = args.tar_path
    if osp.exists(tar_path) and not args.force:
        print('{} already exists. Run with --force to overwrite.'.format(
            tar_path))
    else:
        gdown.download(url, tar_path, quiet=False)
    print('untar {}'.format(tar_path))
    tar = tarfile.open(tar_path)
    for member in tar.getmembers():
        if member.isreg():
            tar.extract(member)
    tar.close()
    print('download data successfully!')
def setup_dataset():
    """Setup the Train and Validation datasets

    Returns:
        (tuple): tuple containing:

            - **train_dataset** (torch.utils.data.TensorDataset): pitch distribution
            - **val_dataset** (torch.utils.data.TensorDataset): pitch to chord distribution
    """
    filename = 'SINGLELABEL_Colleen_and_Alex_training_data_4.19.csv'
    if (not os.path.exists(filename)):
        url = 'https://drive.google.com/file/d/1eOLNOl6ZMz4UxQ7qbSI-bJSSNkmNZjr9/view?usp=sharing'
        gdown.download(url, filename, quiet=False)
        md5 = 'fa837a88f0c40c513d975104edf3da17'
        gdown.cached_download(url,
                              filename,
                              md5=md5,
                              postprocess=gdown.extractall)

    df = pd.read_csv(filename)
    df = df[[
        'title', 'abstract', 'labels', 'doi', 'url', 'single_labels',
        'labels_string'
    ]]
    df = df[df['single_labels'].notnull()]

    labels = []
    docs = []
    labels_test = []
    docs_test = []
    labels_dict = [
        "'Protect from harm'", "'Process resources'",
        "'Sense send or process information'",
        "'Maintain structural integrity'", "'Move'", "'Attach'",
        "'Maintain ecological community'",
        "'Chemically modify or Change energy state'", "'Change size or color'",
        "'Physically assemble/disassemble'"
    ]

    single_labels = df["single_labels"].tolist()
    abstract = df["abstract"].tolist()
    title = df["title"].tolist()
    for i in range(len(title)):
        if i < len(title) - 40:
            docs.append(abstract[i])
            labels.append(labels_dict.index(single_labels[i]))
        else:
            docs_test.append(abstract[i])
            labels_test.append(labels_dict.index(single_labels[i]))

    print("Number of training labels: {:}".format(len(labels)))
    print("Number of training docs: {:}".format(len(docs)))
    print("Number of test labels: {:}".format(len(labels_test)))
    print("Number of test docs: {:}".format(len(docs_test)))

    tokenizer = AutoTokenizer.from_pretrained(
        "allenai/scibert_scivocab_uncased")
    print('SciBERT tokenizer loaded')

    # original abstract
    print(' Original: ', docs[5])
    # abstract split into tokens
    print('Tokenized: ', tokenizer.tokenize(docs[5]))
    # abstract as mapped to ids
    print('Token IDs: ',
          tokenizer.convert_tokens_to_ids(tokenizer.tokenize(docs[5])))

    # Finishing tokenizing all docs and map tokens to thier word IDs
    input_ids = []
    attention_masks = []
    actual_labels_test = []

    for d in docs:
        encoded_dict = tokenizer.encode_plus(
            d,
            truncation=True,
            add_special_tokens=True,
            max_length=256,
            pad_to_max_length=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        input_ids.append(encoded_dict['input_ids'])
        attention_masks.append(encoded_dict['attention_mask'])

    # Convert the lists into tensors.
    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)
    labels = torch.tensor(labels)

    print('Original: ', docs[5])
    print('Token IDs:', input_ids[5])
    print('Reverse:', tokenizer.convert_ids_to_tokens(input_ids[5]))

    # Split up training & testing/validation
    dataset = TensorDataset(input_ids, attention_masks, labels)

    # Number of docs to include per set
    train_size = int(0.9 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    return train_dataset, val_dataset, tokenizer
def train(epochs, lr, bs, savemodel, csv, imgpath):
    np.random.seed(0)
    train_set_full = pd.read_csv(csv)
    list_ids = list(train_set_full["image"].values)
    list_masks = list(train_set_full["mask"].values)
    # get root directories
    base_img_path = imgpath
    img_path = os.path.join(base_img_path, "images")
    mask_path = os.path.join(base_img_path, "masks")

    list_ids = list_ids
    list_masks = list_masks
    labels = dict(zip(list_ids, list_masks))

    out_models = os.path.join(".", "output", "models")
    out_history = os.path.join(".", "output", "history")
    if savemodel and not os.path.exists(out_models):
        os.makedirs(out_models)
    if not os.path.exists(out_history):
        os.makedirs(out_history)

    if not os.path.exists(os.path.join("mn_classification_weights.h5")):
        url = "https://drive.google.com/uc?id=1Kzy257D9HgV9MQHEk1SCbBBvAR477stH"
        output = "mn_classification_weights.h5"
        gdown.download(url, output, quiet=False)

        gdown.cached_download(url,
                              output,
                              md5=WEIGHTS_CHECKSUM,
                              postprocess=gdown.extractall)

    for arch in [8, 16, 32]:
        print(f"Starting FCN-MN {arch}s training")
        if arch == 8:
            model = mobilenet_8s(train_encoder=True,
                                 final_layer_activation="sigmoid",
                                 prep=True)
        if arch == 16:
            model = mobilenet_16s(train_encoder=True,
                                  final_layer_activation="sigmoid",
                                  prep=True)
        else:
            model = mobilenet_32s(train_encoder=True,
                                  final_layer_activation="sigmoid",
                                  prep=True)

        train_generator = DataGeneratorMobileNetKeras(
            batch_size=bs,
            img_path=img_path,
            labels=labels,
            list_IDs=list_ids,
            n_channels=3,
            n_channels_label=1,
            shuffle=True,
            mask_path=mask_path,
        )

        model.compile(loss="binary_crossentropy",
                      optimizer=RMSprop(),
                      metrics=["accuracy"])

        train_history = model.fit_generator(generator=train_generator,
                                            use_multiprocessing=True,
                                            epochs=epochs)

        timestamp = str(datetime.now().strftime("%Y%m%d_%H-%M-%S"))

        model_name = "{}-{}s_fcn_mn".format(timestamp, arch)

        if savemodel:
            model.save(out_models, model_name + ".h5")

        history_csv = pd.DataFrame(train_history.history)
        history_csv.to_csv(os.path.join(out_history, model_name + ".csv"))
Example #32
0
    margin = input_dict['margin']

    # Initalizes the embedding model
    emb_model = emb_init(img_size, emb_size)

    # Load pretrained weights to emb_model
    try:
        emb_model.load_weights(input_dict['weights_path'])
    except:
        if 'true' in input_dict['download_weights'].lower():

            from zipfile import ZipFile
            import gdown
            print("Downloading weights......")
            gdown.download(
                'https://drive.google.com/uc?export=download&confirm=tOfl&id=1NYd6cQlewoQiFH71BHeOy2eTsZEvGzLg',
                output='./weights/pretrained.zip',
                quiet=False)

            with ZipFile("./weights/pretrained.zip", 'r') as zip:
                zip.extractall(path='./weights/')

            emb_model.load_weights('./weights/pretrained/ptm_weights')
            print('pretrained weights loaded successfully !!\n')

        elif 'false' in input_dict['download_weights'].lower():

            emb_model.load_weights('./weights/pretrained/ptm_weights')
            print('pretrained weights loaded successfully !!\n')

        else:
            print("pretrained weights not found !")