def create_training_dataloaders(data: pd.DataFrame, splitting_params_fn: str,
                                batching_params_fn: str):
    """
    Creates train/val/test dataloaders for Pytorch model training and evaluation.
    :param data: dataframe with generated features
    :param splitting_params: file with kwargs for splitting function ()
    :param batching_params_fn: file with kwargs for Prototypical Network batching
    :return: dict of dataloaders (and label encoder)
    """
    if not Path(splitting_params_fn).exists():
        logger.error(
            f"File with settings for splitting data was not found with path provided."
        )
        raise FileNotFoundError(
            f"File with settings for splitting data was not found with path provided."
        )

    if not Path(batching_params_fn).exists():
        logger.error(
            f"File with settings for batching data was not found with path provided."
        )
        raise FileNotFoundError(
            f"File with settings for batching data was not found with path provided."
        )

    splitting_params = dict(read_json(splitting_params_fn)).get(
        "splitting_params", {})
    logger.debug(f"Splitting parameters: {splitting_params}")

    batching_params = dict(read_json(batching_params_fn)).get(
        "batching_options", {})
    logger.debug(f"Batching parameters: {batching_params}")

    if splitting_params.get('encode_target', False):
        splits, encoder = create_splits(data, **splitting_params)
    else:
        splits = create_splits(data, **splitting_params)

    dataloaders = defaultdict()
    for ds_type, splitted_data in splits.items():
        dataloaders[ds_type] = init_dataloader(
            *splitted_data,
            mode=ds_type,
            classes_per_it=batching_params.get("classes_per_it_train"),
            iterations=batching_params.get("iterations"),
            num_query=batching_params.get("num_query_train"),
            num_support=batching_params.get("num_support_train"))
        logger.info(f"Data loader of type: {ds_type} created.")
    del splits
    _ = gc.collect()

    if splitting_params.get('encode_target', False):
        return dataloaders, encoder
    else:
        return dataloaders
Esempio n. 2
0
    def test_normalize_dataset(self):
        data = pd.DataFrame({
            'data': [
                list(np.random.random_integers(1, 20, size=10))
                for _ in range(50)
            ]
        })

        # Case #1
        location = dict(
            read_json(config.get(
                'GazeVerification',
                'verification_params'))).get('pretrained_model_location')
        location = self._current_base_path / location

        norm_data = normalize_gaze(data,
                                   to_restore=False,
                                   to_save=True,
                                   checkpoint_dir=location)
        self.assertListEqual(["data", "data_scaled"], list(norm_data.columns))

        # Case #2
        norm_data = normalize_gaze(data,
                                   to_restore=True,
                                   to_save=False,
                                   checkpoint_dir=location)
        self.assertListEqual(["data", "data_scaled"], list(norm_data.columns))
Esempio n. 3
0
def db_connection():
    """Function for connecting, creating and

    Returns
    -------

    """
    db_credentials = read_json('data/sql-connection.json')

    conn = pyodbc.connect(
        "Driver={};Server={};Database={};uid={};pwd={};".format(
            db_credentials.get('driver'),
            db_credentials.get('server'),
            db_credentials.get('database'),
            db_credentials.get('user'),
            db_credentials.get('password')
        )
    )

    conn.cursor().execute("""
        CREATE TABLE RV.exp_acoes (
            Cliente INTEGER PRIMARY KEY,
            Financeiro FLOAT NOT NULL
        );
        
        INSERT INTO RV.exp_acoes (Cliente, FInanceiro) VALUES (1, 96000);
        INSERT INTO RV.exp_acoes (Cliente, FInanceiro) VALUES (2, 250000);
        INSERT INTO RV.exp_acoes (Cliente, FInanceiro) VALUES (3, 20500);
    """)

    conn.commit()
    conn.close()
 def __init_prototypical_model(self, embedding_model: nn.Module,
                               to_load: bool):
     """
     Create Prototypical model form given Embedding base model or loaded from file.
     :param embedding_model: pre-trained model;
     :param to_load: whether to load pre-trained weights from file;
     """
     self._model_parameters = dict(
         read_json(config.get("GazeVerification", "model_params")))
     if to_load:
         fname = os.path.join(
             sys.path[0],
             self._eval_parameters.get("pretrained_model_location", "."),
             self._eval_parameters.get("model_name", "model.pt"))
         if not os.path.isfile(fname):
             logger.error(
                 f"No pretrained model file found in given path: {fname}.\n",
                 f"Check path and retry.")
             return
         else:
             logger.info(f"Loading model from: {fname}")
             embedding_model = init_model(
                 EmbeddingNet,
                 parameters=self._model_parameters,
                 dir=self._eval_parameters.get("pretrained_model_location",
                                               "."),
                 filename=self._eval_parameters.get("model_name",
                                                    "model.pt"))
     self._protypical_model = PrototypeNet(embedding_model).eval()
     logger.info(f"Prototypical model created.")
def main():
    config = read_json(configfile)
    if config is not None:

        args = parser()

        trackers = ','.join(config['trackers'])
        webseeds = ','.join(config['webseeds'])

        output = args.output

        if output is None:
            if '/' in args.input:
                t = args.input.split('/')
                output = t[-2] if t[-1] is '' else t[-1] #trailing slash
            else:
                output = '%s.torrent' %(args.input)

        if not output.endswith('.torrent'):
            output += '.torrent'

        print('\ncreating torrent for %s:' %(args.input))
        p = Popen([config['mktorrent'], '-a', trackers, '-w', webseeds, '-o', output, '-c', '%s' %(config['comment']), args.input])
        p.communicate()
        print('done')
        if args.magnet:
            print('magnet link: %s' %(get_magnet(output)))

    else:
        print('Error: Could not read from config file %s. Does it exist? Is the json valid?' %(configfile))
Esempio n. 6
0
 def authenticate():
     """This function logs in some reddit´s account.
     Returns
     -------
     """
     return praw.Reddit(
         **helpers.read_json('settings/reddit_settings.json'))
Esempio n. 7
0
 def __init__(self):
     self.driver = webdriver.Chrome(
         r'D:\PythonProjects\Personal\python-sandbox\drivers'
         r'\chromedriver.exe'
     )
     self.credentials = helpers.read_json('credentials.json')
     self.url = 'https://banco_abc.nexusweb.com.br/'
     self.alert = None
Esempio n. 8
0
 def __init__(self):
     self.driver = webdriver.Chrome(
         r'E:\PythonProjects\Personal\friday\data\webdriver'
         r'\chromedriver.exe')
     self.credentials = \
         helpers.read_json('settings/punch_a_clock_settings.json')
     self.url = 'https://banco_abc.nexusweb.com.br/'
     self.alert = None
Esempio n. 9
0
    def __init__(self, user='******', limit=1, verbose=True):
        self.credentials = helpers.read_json('settings/ps_settings.json')

        self.api = tweepy.API(self.authenticate())

        self.user = user
        self.limit = limit

        self.verbose = verbose
Esempio n. 10
0
def award_by_hash(identifier=None):
	award = None
	if identifier+'.json' in os.listdir(config.JSONS_PATH):
		id = identifier
	else:
		hashmap_content = helpers.read_json(config.HASHMAP_PATH)
		id = hashmap_content.get(identifier, None)
	if id:
		award, verification_info = helpers.get_id_info(id)
	if award:
		return render_template('award.html', award=award, verification_info=urllib.urlencode(verification_info))
	return "Sorry, this page does not exist."
 def __init__(self,
              embedding_model: nn.Module = None,
              to_load: bool = True):
     self._eval_parameters = dict(
         read_json(config.get("GazeVerification", "verification_params")))
     self._device = torch.device(
         self._eval_parameters.get("training_options",
                                   {}).get("device", "cpu"))
     self.__init_prototypical_model(embedding_model, to_load)
     self.__init_parameters()
     self.__modes = ['identification', 'verification', 'embeddings', 'run']
     seed_everything(seed_value=11)
Esempio n. 12
0
    def test_create_dataloaders(self):

        splitting_params = dict(
            read_json(config.get('Preprocessing', 'processing_params'))).get(
                "splitting_params", {})
        logger.debug(f"Splitting parameters: {splitting_params}")

        batching_params = dict(
            read_json(config.get('GazeVerification',
                                 'model_params'))).get("batching_options", {})
        logger.debug(f"Batching parameters: {batching_params}")

        n_rows = 600
        data_col = splitting_params.get("data_col")  # 'data_scaled'
        target_col = splitting_params.get("target_col")  # 'user_id'
        session_col = splitting_params.get(
            "session_id_col")  # 'unique_session_id'

        data = pd.DataFrame({
            data_col: [
                list(np.random.uniform(0, 1, size=(120, )))
                for _ in range(n_rows)
            ],
            target_col: [
                sss for ss in [[s] * 60 for s in np.arange(0, n_rows // 60)]
                for sss in ss
            ],
            session_col: [
                sss for ss in [[s] * 10 for s in np.arange(0, n_rows // 10)]
                for sss in ss
            ]
        })
        dataloaders = create_training_dataloaders(
            data,
            splitting_params_fn=config.get('Preprocessing',
                                           'processing_params'),
            batching_params_fn=config.get('GazeVerification', 'model_params'))
        self.assertEqual(2, len(dataloaders))
        self.assertIn("train", list(dataloaders.keys()))
        self.assertIn("validation", list(dataloaders.keys()))
Esempio n. 13
0
def award_by_hash(identifier=None):
    award = None
    if identifier + '.json' in os.listdir(config.JSONS_PATH):
        id = identifier
    else:
        hashmap_content = helpers.read_json(config.HASHMAP_PATH)
        id = hashmap_content.get(identifier, None)
    if id:
        award, verification_info = helpers.get_id_info(id)
    if award:
        return render_template(
            'award.html',
            award=award,
            verification_info=urllib.urlencode(verification_info))
    return "Sorry, this page does not exist."
Esempio n. 14
0
    def __init__(self, numOfKeywords, pathData, dataset_name, normalization):
        super().__init__()
        self.__lan = getlanguage(pathData + "/Datasets/" + dataset_name)
        self.__numOfKeywords = numOfKeywords
        self.__dataset_name = dataset_name
        self.__normalization = normalization
        self.__pathData = pathData
        self.__pathToDFFile = self.__pathData + "/Models/Unsupervised/dfs/" + self.__dataset_name + '_dfs.gz'
        self.__pathToDatasetName = self.__pathData + "/Datasets/" + self.__dataset_name
        self.__keywordsPath = f"{self.__pathData}/Keywords/{self.__class__.__name__}/{self.__dataset_name}"
        self.__outputPath = self.__pathData + "/conversor/output/"
        self.__algorithmName = f"{self.__class__.__name__}"

        self.model = init_keyword_extractor(
            read_json('evaluation/config/embedrank_bert_as_a_service.json'))
Esempio n. 15
0
    def __init__(self, **kwargs):
        """
        Class for training selected model with provided parameters.
        It keeps saving training statistics, checkpoints (is selected such option)
        and final model weights in to new directory named in unique way.
        kwargs: {'model', 'loss'}
        """
        self.__is_fitted = False

        parameters_fn = config.get("GazeVerification", "model_params")
        if not Path(parameters_fn).exists():
            logger.error(
                f"File with training model parameters was not found by the provided path: {parameters_fn}"
            )
            raise FileNotFoundError(
                f"File with training model parameters was not found by the provided path."
            )

        logger.info(f"Loading training model parameters from {parameters_fn}")
        self.__general_parameters = dict(read_json(parameters_fn))

        # The most appropriate way for providing experiment name is from upper entity - Stand/Experimenter class
        # in this case all folders already created (for training stats/checkpoints/output visualizations)
        if kwargs.get("experiment_name", None) is not None:
            self.__experiment_name = kwargs.get("experiment_name")
        else:
            # But in some cases it can be set manually here
            self.__experiment_name = self.__general_parameters.get(
                "experiment_name",
                f"exp_{datetime.now().strftime('%Y.%m.%d-%H:%M:%S')}")
        logger.info(f"Experiment {self.__experiment_name} setup started...")
        self.__models_parameters = self.__general_parameters.get(
            "model_params", {})
        self.__batching_parameters = self.__general_parameters.get(
            "batching_options", {})
        self.__training_parameters = self.__general_parameters.get(
            "training_options", {})
        logger.info(
            f"Training general parameters: {self.__general_parameters}")
        self.__create_folders()

        # Set device type
        self.__acquire_device()
        self.__init_train_options(**kwargs)
        seed_everything(seed_value=11)
Esempio n. 16
0
    def _load_data(self):
        # open file dialog
        filepath = Tk.filedialog.askopenfilename(
            title="Select file",
            filetypes=(("json files", "*.json"), ("all files", "*.*")))
        print(filepath)
        if filepath is not None:
            self.fig.clear()  # clear everything that was drawn before

            data = read_json(filepath)
            self.delaunay_triangulation = DelaunayTriangulation(**data)

            # display input in the text field
            self._print_info(data)

            # plot the domain
            plot.plot(plt, self.delaunay_triangulation.triangulation_params)
            self.canvas.draw()  # redraw the canvas
Esempio n. 17
0
def test(path):
    # path = r'C:\Users\dyrs-ai-win10\Desktop\correct_wall\result'
    save__path = r'C:\Users\dyrs-ai-win10\Desktop\correct_wall\compare'
    # name = r'\lianjia_chaoyang_daxiyangxinchengCqu_006.json'
    all_list = os.listdir(path)
    file_list = [x for x in all_list if x[-4:] == 'json']
    for fi in file_list:
        print(fi)
        f_n = path + '/' + fi

        # f_s0 = save__path + '/' + fi[:-4] + r'0.png'
        # helpers.read_json1(f_n,f_s0)

        walls = helpers.read_json(f_n)
        f_s0 = save__path + '/' + fi[:-4] + r'0.png'
        helpers.draw_cle_test(walls, f_s0)
        an = run(walls)
        f_s1 = save__path + '/' + fi[:-4] + r'1.png'
        helpers.draw_cle_test(an, f_s1)
Esempio n. 18
0
    def __init__(self, path, subreddit, name=None, playboy_on_reddit=False):
        self.credentials = read_json('settings.json')
        self.logging_('%(levelname)s: %(asctime)s - %(message)s')
        self.path = path

        self.name = name
        self.subreddit = subreddit

        if playboy_on_reddit:
            self.subreddit = 'PlayboyOnReddit'
            self.credentials.get('reddit')['username'] = '******'
            self.credentials.get('reddit')['client_id'] = 'H5SoyNl14zTjVQ'
            self.credentials.get('reddit')['client_secret'] = \
                '0RJmTHumxR6AyM0FdFjLwi9Jzpo'

        self.reddit = self.reddit_authenticate()
        self.imgur = self.imgur_authenticate()

        self.reddit.validate_on_submit = True
Esempio n. 19
0
def geocoding(location):
    """This function geocodes a string location.

    Parameters
    ----------
    location : str
        String location.

    Returns
    -------
    geocoded_location : str
        Lat, Long coordinates.

    """
    response = googlemaps.Client(
        helpers.read_json('settings/maps_settings.json').get(
            'api_key')).geocode(location)

    return 'latitude: {lat:.4f}\nlongitude: {lng:.4f}'.format(
        **response[0].get('geometry').get('location'))
Esempio n. 20
0
def geodecoding(location):
    """This function inverse geocodes a string location.

    Parameters
    ----------
    location : iterator
        Lat, Long coordinates.

    Returns
    -------
    formatted_address : str
        Location.

    """
    response = googlemaps.Client(
        helpers.read_json('settings/maps_settings.json').get(
            'api_key')).reverse_geocode(
                (float(location[0]), float(location[1])))

    return response[0].get('formatted_address')
Esempio n. 21
0
    def test_sgolay_filter_dataset(self):
        """
        Test getting derivatives with Savitsky-Golay filter.
        (Here we do not check correctness of filtration and derivatives calculation)
        """
        sess = groupby_session(self.train_dataset)
        sess = sgolay_filter_dataset(
            sess,
            **dict(
                read_json(
                    config.get("EyemovementClassification",
                               "filtering_params"))))

        self.assertEqual(len(sess),
                         np.sum(["velocity_sqrt" in s.columns for s in sess]))
        self.assertEqual(
            len(sess),
            np.sum(["stimulus_velocity" in s.columns for s in sess]))
        self.assertEqual(
            len(sess),
            np.sum(["acceleration_sqrt" in s.columns for s in sess]))
Esempio n. 22
0
    def __init_algorithm(self):
        """
        Creates instance of selected algorithm with given parameters.
        :return: algorithm class object.
        """
        self._model_params = dict(
            read_json(config.get('EyemovementClassification', 'model_params')))
        self._algorithm_name = self._model_params.get('classifier_algorithm',
                                                      'ivdt')

        if self._algorithm_name not in implemented_algorithms:
            logger.error(
                f"""Eye movements Classifier implements few algorithms: {implemented_algorithms}.
                         Given algorithm type {self._algorithm_name} is unrecognized."""
            )
            raise NotImplementedError(
                f"Given algorithm type {self._algorithm_name} is unrecognized."
            )

        if self._algorithm_name == 'ivdt':
            self._algorithm = IVDT(
                saccade_min_velocity=self._model_params.get(
                    'saccade_min_velocity'),
                saccade_min_duration=self._model_params.get(
                    'min_saccade_duration_threshold'),
                saccade_max_duration=self._model_params.get(
                    'max_saccade_duration_threshold'),
                window_size=self._model_params.get('window_size'),
                dispersion_threshold=self._model_params.get(
                    'dispersion_threshold'))
        else:
            logger.error(
                f"""Eye movements Classifier implements few algorithms: {implemented_algorithms}.
                                     Given algorithm type {self._algorithm_name} is unrecognized."""
            )
            raise NotImplementedError(
                f"Given algorithm type {self._algorithm_name} is unrecognized."
            )
Esempio n. 23
0
 def __init__(self):
     self._agg_parameters = dict(
         read_json(
             config.get("GazeVerification",
                        "verification_params"))).get('aggregation_params')
Esempio n. 24
0
# -*- coding: utf-8 -*-
import helpers
import telebot

import skills


friday = telebot.TeleBot(
    **helpers.read_json('settings/telegram_settings.json')
)

functions = skills.functions
arg_functions = skills.arg_functions
media_functions = skills.media_functions


@friday.message_handler(func=lambda message: True)
def message_handler(message):
    """This function gets the incoming message and calls the respective
    skill.

    Parameters
    ----------
    message : telebot.types.Message
        The message object.

    """
    payload = helpers.TelegramPayloadParser(message).__call__()
    message_text = payload.get('message').lower().split(' ')
    try:
        if len(message.text.split(' ')) > 1:
Esempio n. 25
0
import helpers
import json

# Take all input lists, process, and write back to file

fileIn = "../input_lists.json"

inputLists = helpers.read_json(fileIn)

print("Cleaning input lists...")

for aList in inputLists.keys():
	print("    " + aList)
	# Standardize as all lowercase
	listWIP = [x.lower() for x in inputLists[aList]]
	listWIP = list(set(listWIP))        # Remove duplicates
	listWIP.sort()                      # List in alphabetical order
	inputLists[aList] = listWIP

str_out = json.dumps(inputLists, indent=4, sort_keys=True)

with open(fileIn, "w") as fileout:
	fileout.write(str_out)

print("Input lists cleaned!")
Esempio n. 26
0
tags_parser = find_subparsers.add_parser('tags', help=txt['tags'])
tags_parser.add_argument('-ls',
                         '--list',
                         default=True,
                         action='store_true',
                         help=txt['tags.list'])
tags_parser.set_defaults(func=cli.fetch_tags)

# Create the command 'validate'
validate_parser = subparsers.add_parser('validate', help=txt['validate'])
validate_parser.add_argument('--file',
                             nargs='+',
                             help=txt['validate.file'],
                             type=FileType('r'))
validate_parser.set_defaults(func=cli.validate_note)

# Create the command 'submit'
submit_parser = subparsers.add_parser('submit', help=txt['submit'])
submit_parser.set_defaults(func=cli.submit_note)

if __name__ == '__main__':
    db = read_json('db/talks.json')

    args = parser.parse_args()

    if 'func' in args:
        args.func(db, args)
    else:
        # if the user calls the script without any command
        parser.print_help()
Esempio n. 27
0
import helpers
import json
import re

datfilepath = "../github-data/labReposInfo.json"
allData = {}

# Read input lists of organizations and independent repos of interest
inputLists = helpers.read_json("../input_lists.json")
orglist = inputLists["orgs"]
repolist = inputLists["repos"]

# Read pretty GraphQL queries
#   Org repos
query_in = helpers.read_gql("../queries/org-Repos-Info.gql")
#   Solo repos
query_solo_in = helpers.read_gql("../queries/repo-Info.gql")

# Retrieve authorization token
authhead = helpers.get_gitauth()

# Iterate through orgs of interest
print("Gathering data across multiple paginated queries...")
collective = {u'data': {}}
tab = "    "

for org in orglist:
	pageNum = 1
	print("\n'%s'" % (org))
	print(tab + "page %d" % (pageNum))
Esempio n. 28
0
import helpers
import json
import re

datfilepath = "../github-data/labReposInfo.json"
allData = {}

# Read input lists of organizations and independent repos of interest
inputLists = helpers.read_json("../input_lists.json")
orglist = inputLists["orgs"]
repolist = inputLists["repos"]

# Read pretty GraphQL queries
#   Org repos
query_in = helpers.read_gql("../queries/org-Repos-Info.gql")
#   Solo repos
query_solo_in = helpers.read_gql("../queries/repo-Info.gql")

# Retrieve authorization token
authhead = helpers.get_gitauth()

# Iterate through orgs of interest
print("Gathering data across multiple paginated queries...")
collective = {u'data': {}}
tab = "    "

for org in orglist:
    pageNum = 1
    print("\n'%s'" % (org))
    print(tab + "page %d" % (pageNum))
Esempio n. 29
0
from nba.player_stat import NbaPlayerStat
from nba.team_stat import NbaTeamStat
from nba.stat import NbaStat
from helpers import combine_stats, read_json


data = read_json("data/game.json")["data"]["attributes"]
away_team_stat = data["away_team"]["stat"]
home_team_stat = data["home_team"]["stat"]
away_players = data["away_players"]
home_players = data["home_players"]


def assert_ortg(advanced_stat, stat):
    assert abs(round(advanced_stat.ortg) - stat["ortg"]) <= 1


class TestOrtg:
    def test_team_ortg(self):
        away_team_advanced_stat = NbaTeamStat(away_team_stat, home_team_stat)
        assert_ortg(away_team_advanced_stat, away_team_stat)
        home_team_advanced_stat = NbaTeamStat(home_team_stat, away_team_stat)
        assert_ortg(home_team_advanced_stat, home_team_stat)

    def test_player_ortg(self):
        for player in away_players:
            player_stat = player["stat"]
            player_advanced_stat = NbaPlayerStat(
                player_stat, away_team_stat, home_team_stat
            )
            assert_ortg(player_advanced_stat, player_stat)
# -*- coding: utf-8 -*-

import logging
import telebot
import config
from flask import Flask
from flask import request
from helpers import read_json, get_keyboard

logging.basicConfig(filename="file.log", level=logging.INFO)
log = logging.getLogger("ex")

app = Flask(__name__)
API_TOKEN = config.TOKEN
bot = telebot.TeleBot(API_TOKEN)
storage = read_json('storage.json')


@app.route('/', methods=['POST', 'GET'])
def web_hook():
    if request.method == 'POST':
        if request.headers.get('content-type') == 'application/json':
            json_string = request.get_data().decode('utf-8')
            update = telebot.types.Update.de_json(json_string)
            bot.process_new_updates([update])
            return ''
        else:
            request.abort(403)
    return ''

import helpers
import json
import re

datfilepath = "../github-data/labRepos_CreationHistory.json"
allData = {}

# Check for and read existing data file
allData = helpers.read_existing(datfilepath)

# Read repo info data file (to use as repo list)
dataObj = helpers.read_json("../github-data/labReposInfo.json")

# Populate repo list
repolist = []
print("Getting internal repos ...")
repolist = sorted(dataObj["data"].keys())
print("Repo list complete. Found %d repos." % (len(repolist)))

# Read pretty GraphQL query
query_in = helpers.read_gql("../queries/repo-CreationDate.gql")

# Rest endpoint query
query_commits_in = "/repos/OWNNAME/REPONAME/commits?until=CREATETIME&per_page=100"
query_commits_in2 = "/repos/OWNNAME/REPONAME/commits?per_page=100"

# Retrieve authorization token
authhead = helpers.get_gitauth()

# Iterate through internal repos
print("Gathering data across multiple paginated queries...")
    if not os.path.isfile(path):
        print(path)
        # print("Computing clusters for " + kw)
        clus = helpers.apply_kmeans(res, kw)
        # print("Done computing clusters for " + kw)
        kw_meta_df = helpers.get_kw_df(meta_data_df, clus, kw)
        # feather.write_dataframe(kw_meta_df, path)
        kw_meta_df.to_pickle(path)
        return kw_meta_df
    return None


if __name__ == '__main__':
    datasets = []
    for file in os.listdir(os.path.join('Datasets')):
        if file.endswith(".json"):
            datasets.append(file)

    results = []
    for dataset in datasets:
        pool = mp.Pool(mp.cpu_count() - 1)
        path = os.path.join('Datasets', dataset)
        res, meta_data_df = helpers.read_json(path)
        date = re.findall(r'\d{4}-\d{2}-\d{2}', dataset)[0]
        print(date)
        for kw in meta_data_df.keyword.unique():
            results = pool.apply_async(cluster_df,
                                       args=(res, meta_data_df, kw, str(date)))
        pool.close()
        pool.join()
Esempio n. 33
0
    def _run_verification(self):

        verification_params = dict(read_json(config.get('GazeVerification', 'verification_params')))
        estimate_quality = bool(verification_params.get("estimate_quality", 0))

        # Initialize model
        if self._model is None:
            self._model = init_model(filename=config.get('GazeVerification', 'pretrained_model_fn'))
        else:
            print(self._model)

        # Creating dataset
        dataset = datasets.RunDataset(owner_path=config.get('DataPaths', 'owner_data'),
                                      others_path=config.get('DataPaths', 'run_data'),
                                      estimate_quality=estimate_quality)
        print("\nOwner:")
        self.__owner = dataset._owner
        print(dataset._owner)
        print("\nOthers users:")
        for user in dataset._others:
            print(user)

        owner_data = dataset.get_owner_data()
        others_data = dataset.get_others_data()
        if estimate_quality:
            others_data_targets = others_data.groupby(by=['session_id']).agg({'session_target':
                                                                                  lambda x: np.unique(x)[0],
                                                                              "filename":
                                                                                  lambda x: np.unique(x)[0]})
            others_data_targets = others_data_targets.to_dict('records')

        # Make eye movements classification and extract features
        owner_data = run_eyemovements_classification(owner_data, is_train=True, do_estimate_quality=True)
        owner_data = self._fgen.extract_features(owner_data, is_train=True, rescale=True)

        others_data = run_eyemovements_classification(others_data, is_train=True, do_estimate_quality=True)
        others_data = self._fgen.extract_features(others_data, is_train=True, rescale=True)

        print(f"Owner data: {owner_data.shape}")
        print(f"Others data: {others_data.shape}")

        self_threshold = self.__create_threshold(owner_data,
                                                 moves_threshold=verification_params.get("moves_threshold", 0.6),
                                                 default_threshold=verification_params.get("session_threshold", 0.5),
                                                 policy=verification_params.get("policy"))

        verification_results = {}
        for id, session in others_data.groupby(by='session_id'):
            session = session.reset_index(drop=True)
            (result, proba) = self.__evaluate_session(owner_data, session, estimate_quality=estimate_quality,
                                                      moves_threshold=verification_params.get("moves_threshold", 0.6),
                                                      session_threshold=self_threshold,
                                                      policy=verification_params.get("policy"))
            verification_results[id] = (result, proba)
        if estimate_quality:
            self.__print_results(self_threshold, verification_results, others_data_targets)
            self.__estimate_quality(verification_results, others_data_targets)
        else:
            self.__print_results(self_threshold, verification_results)

        return verification_results
Esempio n. 34
0
import helpers
import json
import re

datfilepath = "../github-data/labRepos_Topics.json"
allData = {}

# Read repo info data file (to use as repo list)
dataObj = helpers.read_json("../github-data/labReposInfo.json")

# Populate repo list
repolist = []
print("Getting internal repos ...")
repolist = sorted(dataObj["data"].keys())
print("Repo list complete. Found %d repos." % (len(repolist)))

# Read pretty GraphQL query
query_in = helpers.read_gql("../queries/repo-Topics.gql")

# Retrieve authorization token
authhead = helpers.get_gitauth()

# Iterate through internal repos
print("Gathering data across multiple paginated queries...")
collective = {u'data': {}}
tab = "    "

for repo in repolist:
    pageNum = 1
    print("\n'%s'" % (repo))
    print(tab + "page " + str(pageNum))
Esempio n. 35
0
        # f_s0 = save__path + '/' + fi[:-4] + r'0.png'
        # helpers.read_json1(f_n,f_s0)

        walls = helpers.read_json(f_n)
        f_s0 = save__path + '/' + fi[:-4] + r'0.png'
        helpers.draw_cle_test(walls, f_s0)
        an = run(walls)
        f_s1 = save__path + '/' + fi[:-4] + r'1.png'
        helpers.draw_cle_test(an, f_s1)


if __name__ == "__main__":
    # test()
    path = r'C:\Users\dyrs-ai-win10\Desktop\correct_wall\result'
    # path1 = r'C:\Users\dyrs-ai-win10\Documents\Tencent Files\673722621\FileRecv\outside'
    # test(path)
    name = r'\lianjia_fs_dongguanjiayuan_010.json'
    # name1 = r'\55.json'
    walls = helpers.read_json(path + name)
    helpers.draw_cle(walls)
    an = run(walls)
    helpers.draw_cle(an)

    # helpers.read_json1(path + name1)

    # remove_list = []
    #
    # file = path + name

    # helpers.draw_cle(remove_list,color='g')