Example #1
0
def train_feature(patches_fname,offsets_fname,f,haars_per_feat):
	patches,offsets = load(patches_fname),load(offsets_fname)
	haar_locs = load("haar_locs.pyz")
	x_regressors = boost(patches,offsets[:,0],haars_per_feat,haar_locs)
	y_regressors = boost(patches,offsets[:,1],haars_per_feat,haar_locs)
	pdump(x_regressors,open("x_regressors_%d.pyz"%f,'w'),-1)
	pdump(y_regressors,open("y_regressors_%d.pyz"%f,'w'),-1)
Example #2
0
 def fire(self, sources, values, trigger, etext):
     text = self.etext.get()
     process = 0
     if self.thetype == "IPs":
         if match(r'^((\d{1,2}|1\d{2}|2[0-4]\d|25[0-5])\.){3}(\d{1,2}|1\d{2}|2[0-4]\d|25[0-5])$', etext.get()):
             process += 1
     if self.thetype == "URLs":
         process += 1
     if self.thetype == "SPECs":
         process += 1
     if process >= 1:
         self.tbox["background"] = "White"
         self.tbox["foreground"] = "Black"
         counter = 0
         jobs_list = []
         if not (self.thetype == "SPECs"):
             for x in xrange(len(sources[:])):
                 if values[0][x].get():
                     thread = threading.Thread(target=self.TaskHandler, args=(x, ))
                     thread.setDaemon(True)
                     jobs_list.append(thread)
         else:
             thread = threading.Thread(target=self.TaskHandler, args=(values, ))
             thread.setDaemon(True)
             jobs_list.append(thread)
         for j in jobs_list:
             j.start()
         if not (self.thetype == "SPECs"):
             for line in open(self.read, 'r'):
                 if text in line:
                     counter = counter +1
                 for line in open(self.blread, 'r'):
                     if text in line:
                         counter = counter +1
             if counter == 0:
                 with open(self.read, 'a') as z:
                     z.write(str(etext.get())+"\n")
     else:
         self.tbox["background"] = "Red"
         self.tbox["foreground"] = "white"
     export = []
     set_save = ""
     if self.thetype == "IPs":
         set_save = self.a.IPSettingsfile
         for x in xrange(len(self.a.ip.values)):
             if self.values[0][x].get() == 0:
                 export.append(0)
             else:
                 export.append(1)
         pdump(export, open(set_save, 'wb'))
     if self.thetype == "URLs":
         set_save = self.a.URLSettingsfile
         for x in xrange(len(self.a.url.values)):
             if self.values[0][x].get() == 0:
                 export.append(0)
             else:
                 export.append(1)
         pdump(export, open(set_save, 'wb'))
Example #3
0
 def showSettings(self, where):
     if self.showsettings.get() == 0:
         self.app.grid_forget()
         self.update()
         self.top.update()
         set_save=""
         export = []
         if self.thetype == "IPs":
             set_save = self.a.IPdimensions
         if self.thetype == "URLs":
             set_save = self.a.URLdimensions
         if self.thetype == "SPECs":
             set_save = self.a.SPECIALdimensions
         export.append(int(self.winfo_width()))
         export.append(int(self.winfo_height()))
         pdump( export, open(set_save, 'wb'))
     if self.showsettings.get() == 1:
         self.app.grid(column=4, row=where, sticky="NW")
         self.app.update()
         self.update()
         self.top.update()
         set_save = ""
         export = []
         if self.thetype == "IPs":
             set_save = self.a.IPdimensions
         if self.thetype == "URLs":
             set_save = self.a.URLdimensions
         if self.thetype == "SPECs":
             set_save = self.a.SPECIALdimensions
         export.append(int(self.winfo_width()))
         export.append(int(self.winfo_height()))
         pdump(export, open(set_save, 'wb'))
     if (path.getsize(self.a.IPdimensions) > 0) and (path.getsize(self.a.URLdimensions) > 0) and (path.getsize(self.a.SPECIALdimensions) > 0):
         self.update()
         IPs_dim = pload(open(self.a.IPdimensions, "rb"))
         URLs_dim = pload(open(self.a.URLdimensions, "rb"))
         SPECs_dim = pload(open(self.a.SPECIALdimensions, "rb"))
         theIPx = int(IPs_dim[0])
         theIPy = int(IPs_dim[1])
         theURLx = int(URLs_dim[0])
         theURLy = int(URLs_dim[1])
         theSPECx = int(SPECs_dim[0])
         theSPECy = int(SPECs_dim[1])
         newx = 0
         newy = theIPy + theURLy + theSPECy + 15
         if (theIPx > newx):
             newx = theIPx
         if (theURLx > newx):
             newx = theURLx
         if (theSPECx > newx):
             newx = theSPECx
         self.top.geometry(str(newx) + "x" + str(newy))
         self.top.update()
def get_access_token(path_to_tok='./translate.tok'):
    if fexist(path_to_tok):
        ftok = open(path_to_tok, 'r+')
        tokdata = pload(ftok)
        expiretime = tokdata['expires_in'] 
        if (datetime.now() - expiretime) > timedelta(10,0):
            return tokdata['token']
    else:
        ftok = open(path_to_tok, 'w')
    args = {'client_id':clientid,'client_secret':clientse,'scope':'http://api.microsofttranslator.com/','grant_type':'client_credentials'}
    enc_args = urllib.urlencode(args)
    req = urllib2.Request(tok_url,enc_args)
    response = urllib2.urlopen(req)
    data = json.load(response)
    timeandten = datetime.now() + timedelta(minutes = 10)
    pdump({'token':data['access_token'],'expires_in':timeandten}, ftok)
    return data['access_token']
Example #5
0
def cache(target, args, identifier=None, cache_life=3 * 24 * 3600):
    ''' Run the target function with the given args, and store it to a pickled
    cache folder using the given identifier or the name of the function. The
    next time it is executed, the cached output is returned unless cache_life
    time expires. '''
    if identifier == None: identifier = target.__name__
    identifier = sub(r'[/\\\*;\[\]\':=,<>]', '_', identifier)
    path = join(PATH_RESOURCES, f'.pickled/{identifier}.pk')
    makedirs(dirname(path), exist_ok=True)
    now = time()
    if exists(path):
        with open(path, 'rb') as fp:
            save_time, value = pload(fp)
        if now - save_time <= cache_life:
            return value
    res = target(*args)
    with open(path, 'wb') as fp:
        pdump((now, res), fp, protocol=3)
    return res
Example #6
0
def rescue(identifier,
           function,
           arguments,
           path_data="data",
           path_cache=".pickled/%s.pk",
           cache_life=259200,
           sr=22500,
           window=45000,
           invalid=r"[/\\\*;\[\]\":=,<>]"):
    """ Caches the output of a function. """
    path = path_cache % sub(invalid, "_", identifier)
    makedirs(dirname(path), exist_ok=True)
    if exists(path):
        with open(path, "rb") as fp:
            save_time, rate, value, window = pload(fp)
        if NOW - save_time <= cache_life and rate == sr and window == window:
            return value
    res = function(*arguments)
    with open(path, "wb") as fp:
        pdump((NOW, sr, res, window), fp, protocol=3)
    return res
Example #7
0
def step1_inputs_maker(path_input_file):

    file_name = os.path.basename(path_input_file)
    dir_work = os.path.dirname(path_input_file)
    id_ = file_name.split(".")[0]
    path_setting_file = os.path.join(dir_work, "{} - {}".format(id_, "prefs.p"))
    path_variable_file = os.path.join(dir_work, "{} - {}".format(id_, "args_main.p"))
    fire = standard_fire(np.arange(0, 3*60*60, 1), 273.15+20)
    inputs_extra = {"iso834_time": fire[0],
                    "iso834_temperature": fire[1],}
    # list_kwargs, dict_settings = mc_inputs_generator(dict_extra_variables_to_add=inputs_extra,
    #                                                  dir_file=path_input_file)
    df_args, dict_settings = mc_inputs_generator2(dict_extra_variables_to_add=inputs_extra,
                                                 dir_file=path_input_file)

    # Save list_kwargs as a pickle object.
    pdump(df_args, open(path_variable_file, "wb"))
    saveprint(os.path.basename(path_variable_file))

    pdump(dict_settings, open(path_setting_file, "wb"))
    saveprint(os.path.basename(path_setting_file))
def loadAndCacheDbHTMLPages(databaseSiteBaseURL, databaseSiteHTMLDumpPath,
                            numPages):
    if path.exists(databaseSiteHTMLDumpPath):
        print("## Status: Found cached copy of Music 4 Dance website.")
        databaseSiteHTMLPages = pload(open(databaseSiteHTMLDumpPath, "rb"))
    else:
        # Make HTTP request
        try:
            databaseSiteHTMLPages = []
            for i in range(numPages):
                databaseSiteHTMLPages.append(
                    getHTML(databaseSiteBaseURL + "?page=%d" % (i + 1)).text)
        except:
            print("!! Error: Retrieving Music 4 Dance website unsuccessfull.")
            exit(0)
        else:
            print("## Status: Retrieved Music 4 Dance website.")

        # Save for later
        pdump(databaseSiteHTMLPages, open(databaseSiteHTMLDumpPath, "wb"))
        print("## Status: Cached copy of Music 4 Dance website for later.")

    return databaseSiteHTMLPages
Example #9
0
 def __alt_authorize(self):
     credentials = None
     if USE_SERVICE_ACCOUNTS and not self.alt_auth:
         self.alt_auth = True
         if ospath.exists(self.__G_DRIVE_TOKEN_FILE):
             LOGGER.info("Authorize with token.pickle")
             with open(self.__G_DRIVE_TOKEN_FILE, 'rb') as f:
                 credentials = pload(f)
             if credentials is None or not credentials.valid:
                 if credentials and credentials.expired and credentials.refresh_token:
                     credentials.refresh(Request())
                 else:
                     flow = InstalledAppFlow.from_client_secrets_file(
                         'credentials.json', self.__OAUTH_SCOPE)
                     LOGGER.info(flow)
                     credentials = flow.run_console(port=0)
                 # Save the credentials for the next run
                 with open(self.__G_DRIVE_TOKEN_FILE, 'wb') as token:
                     pdump(credentials, token)
             return build('drive',
                          'v3',
                          credentials=credentials,
                          cache_discovery=False)
     return None
Example #10
0
def load_mc10(study_dir,
              pre_time=0,
              segment=True,
              sync=True,
              save=True,
              save_loc=None,
              save_subj=False,
              start_time_0=True,
              return_data=True):
    """
    Load raw MC10 data from a study directory containing folders for each subjected as downloaded from the
    BioStamp RC web portal

    Parameters
    ---------
    study_dir : str
        Base study directory containing subject folders.
    pre_time : float, int, optional
        Amount of time in seconds to import before the start annotation.  Only applied if 'segment' is True.
        Default is 0 seconds.
    segment : bool, optional
        Segment the data based on annotations.  Defaults to True.
    sync : bool, optional
        Synchronize the timestamps for the inertial sensors.  Timestamps for sensors with the same sampling rates
        will be the same.  All sensors will start at the same time, regardless of sampling rate.
    save : bool, optional
        Whether or not to save (serialize) the imported data.  Defaults to True.
    save_loc : str, optional
        Where to save the data.  Options are None (save in import location), 'import' saves in import location, 'local'
        saves in the file location, or provide a file path where to save the data.  Defaults to None.
    save_subj : bool, optional
        Whether or not to save each subject as individual files.  If True, sets 'return_data' to False.
        Defaults to False.
    start_time_0 : bool, optional
        Start the timestamps at 0 time for each event, or the whole trial. Defaults to True.
    return_data : bool, optional
        Return the imported data as a dictionary (see Returns) if save is True.  If 'save' is False, then always returns
        the imported data.

    Returns
    -------
    save_paths : str, list, optional
        Path to saved data file(s).  If 'save_subj' is true, is a list of all the subject files, else it is a str for
        the path to the one saved file.
    data : dict, optional
        Loaded data.  This is returned if 'save' and 'return_data' are True, or 'save' is False.  Top down structure is
        'subject_id', 'sensor location', 'event id' (if segmenting), 'sensor_type'
    """
    if save:
        if save_subj:  # if splitting subjs, do not return any data
            return_data = False
            save_paths = []

        # Determine the file save location
        if save_loc is None or save_loc == 'import':
            save_path = study_dir
        elif save_loc == 'local':
            save_path = getcwd()
        elif isinstance(save_loc, str):
            if path.isdir(save_loc):
                save_loc_split = save_loc.split(sep)
                if save_loc_split[-1] == '':
                    save_path = sep.joint(save_loc_split[:-1])
                else:
                    save_path = save_loc
            else:
                raise FileNotFoundError(f'No such directory: {save_loc}')
    else:
        save_subj = False
        return_data = True

    bn = len(study_dir.split(sep))  # base length of study folder

    subjs = [
        i for i in list(listdir(study_dir)) if path.isdir(study_dir + sep + i)
    ]
    # list all subfolders, which are subject IDs

    data = {i: dict() for i in subjs}  # allocate data storage for each subject

    for sub in subjs:
        print(f'{sub}\n ---------')
        wkd = walk(study_dir + sep + sub)  # walk down each subject folder
        temp = dict()
        for dname, _, fnames in wkd:
            if 'annotations.csv' in fnames and segment:
                # import annotations for data segmenting
                events, starts, stops = loadtxt(dname + sep +
                                                'annotations.csv',
                                                dtype='U35',
                                                delimiter='","',
                                                skiprows=1,
                                                usecols=(2, 4, 5),
                                                unpack=True)
                # checking for non-unique event names
                uniq, inds, cnts = unique(events,
                                          return_counts=True,
                                          return_inverse=True)
                if any(cnts > 1):  # if any events have more than 1 count
                    ecnts = full_like(
                        cnts, 1)  # create array to keep track of number to add
                    for k in range(len(inds)):  # iterate over the events
                        if cnts[inds[
                                k]] > 1:  # if this event has more than one occurance
                            events[
                                k] += f' {ecnts[inds[k]]}'  # add the chronological number occurence it is to name
                            ecnts[inds[
                                k]] += 1  # increment the occurence number tracker
            elif 'accel.csv' in fnames:
                sens_loc = dname.split(sep)[
                    bn + 1]  # get the sensor location from the directory name
                temp[sens_loc] = dict(
                )  # make a sub dictionary for the sensor location
                for fname in fnames:  # can be multiple files per location (ie accel and gyro)
                    if 'errors' not in fname:  # don't want to do anything with *_error.csv files
                        # load data into data dictionary
                        print(sens_loc, fname)
                        temp[sens_loc][fname[:-4]] = loadtxt(dname + sep +
                                                             fname,
                                                             dtype=float,
                                                             delimiter=',',
                                                             skiprows=1)
        if sync:
            temp = _align_timestamps(temp)  # align time stamps of data

        if segment:
            # segment the data
            data[sub] = _segment_data(temp,
                                      starts,
                                      stops,
                                      events,
                                      pre_time=pre_time,
                                      zero_start=start_time_0)
        else:
            for loc in temp.keys():
                for typ in temp[loc].keys():
                    if start_time_0:
                        temp[loc][typ][:, 0] -= temp[loc][typ][0, 0]
            data[sub] = temp

        for loc in data[sub].keys():
            for typ in data[sub][loc].keys():
                for ev in data[sub][loc][typ].keys():
                    print(1 / mean(diff(data[sub][loc][typ][ev][:, 0])))
                    data[sub][loc][typ][
                        ev][:, 0] /= 1000  # convert all timestamps to ms

        if save_subj:
            if save_loc is None or save_loc == 'import':
                fid = open(
                    save_path + sep + sub + sep + f'data_subj_{sub}.pickle',
                    'wb')
                save_paths.append(save_path + sep + sub + sep +
                                  f'data_subj_{sub}.pickle')
            else:
                fid = open(save_path + sep + f'data_subj_{sub}.pickle', 'wb')
                save_paths.append(save_path + sep + f'data_subj_{sub}.pickle')

            pdump(data[sub], fid)  # serialize the data
            fid.close()  # close the file

    if not save_subj:
        fid = open(save_path + sep + 'data.pickle', 'wb')
        save_paths = save_path + sep + 'data.pickle'
        pdump(data, fid)
        fid.close()

    if return_data and save:
        return save_paths, data
    elif save:
        return save_paths
    else:
        return data
Example #11
0
ts=TimeSeries(f_hdf5)
print "hdf5 files are: ", f_hdf5
#cs= float(raw_input('enter cs in cm/sec: '))
#tB= float(raw_input('enter bondi time in sec: '))
for cnt,f in enumerate(f_hdf5):
    print "loading file: %s" % f
    pf= load(f)
    ts.Time[cnt]= pf.current_time
    cgrid= pf.h.covering_grid(level=lev,\
                          left_edge=pf.h.domain_left_edge,
                          dims=pf.domain_dimensions)  #approx from lev0
    alld= pf.h.all_data()  #yt exact
    gamma=1.001
    cs_iso= cs_ad/gamma**0.5
    dat= CovGrid_vs_AllDat(cgrid,alld,\
                           cs_iso,lev,pf.domain_width[0],pf.domain_dimensions[0])
    #make histograms    
    fsave=spath+"hist_"+pf.basename[5:9]+".png"
    #     Bmag=(bx**2+by**2+bz**2)**0.5
    #     beta= 8*np.pi*rho*cs_iso**2/Bmag**2
    #     MachA=(beta/2)**0.5*MachS
    plot_hgrams(fsave,dat)
    #save Qs of Interest
    save_vals(ts,cnt,dat)
fsave=spath+"tseries.png"
plot_TSeries(fsave,ts,tB)
fout=open(spath+"tseries.dump","w")
pdump((ts,cs_iso,tB),fout)
fout.close()
Example #12
0
def main(updater):
    dispatcher = updater.dispatcher

    dbFuncs.initDB()

    newList = ConversationHandler(
        entry_points=[
            CommandHandler('new', new, Filters.private),
            CommandHandler('start', start, Filters.private, pass_args=True)
        ],
        states={
            SETNAME: [MessageHandler(Filters.text & Filters.private, setName)]
        },
        fallbacks=[
            CommandHandler('cancel',
                           cancel,
                           Filters.private,
                           pass_user_data=True)
        ])

    listHandler = ConversationHandler(
        entry_points=[CallbackQueryHandler(pushInline)],
        states={SETTINGS: [CallbackQueryHandler(pushAdmin)]},
        fallbacks=[
            CallbackQueryHandler(pushInline),
            CommandHandler('cancel',
                           cancel,
                           Filters.private,
                           pass_user_data=True)
        ],
        per_message=True)

    dispatcher.add_handler(newList)
    dispatcher.add_handler(listHandler)
    dispatcher.add_handler(CallbackQueryHandler(pushInline))
    dispatcher.add_handler(
        CommandHandler('send', sendAll,
                       Filters.private & Filters.chat(chat_id=[114951690])))
    dispatcher.add_handler(CommandHandler('help', help, Filters.private))
    dispatcher.add_handler(CommandHandler('backup', backup, Filters.private))
    dispatcher.add_handler(InlineQueryHandler(inlineQuery))
    dispatcher.add_handler(ChosenInlineResultHandler(chosenQuery))
    dispatcher.add_handler(
        MessageHandler(Filters.private & Filters.regex(r'^\/.*'), blankCode))
    dispatcher.add_handler(
        MessageHandler(
            Filters.text & Filters.private & Filters.update.edited_message,
            editMessage))
    dispatcher.add_handler(
        MessageHandler(
            Filters.private & Filters.text & Filters.reply &
            (~Filters.update.edited_message), rcvReply))
    dispatcher.add_handler(
        MessageHandler(
            Filters.text & Filters.private & (~Filters.update.edited_message),
            rcvMessage))
    dispatcher.add_error_handler(contextCallback)

    try:
        with open('{0}/userdata'.format(backupsDir), 'rb') as file:
            dispatcher.user_data = pload(file)
    except Exception as e:
        logger.warning(repr(e))
    try:
        with open('{0}/newList'.format(backupsDir), 'rb') as file:
            newList.conversations = pload(file)
    except Exception as e:
        logger.warning(repr(e))
    try:
        with open('{0}/listHandler'.format(backupsDir), 'rb') as file:
            listHandler.conversations = pload(file)
    except Exception as e:
        logger.warning(repr(e))

    updater.start_polling()

    updater.idle()

    try:
        with open('{0}/userdata'.format(backupsDir), 'wb+') as file:
            pdump(dispatcher.user_data, file)
    except Exception as e:
        logger.warning(repr(e))
    try:
        with open('{0}/newList'.format(backupsDir), 'wb+') as file:
            pdump(newList.conversations, file)
    except Exception as e:
        logger.warning(repr(e))
    try:
        with open('{0}/listHandler'.format(backupsDir), 'wb+') as file:
            pdump(listHandler.conversations, file)
    except Exception as e:
        logger.warning(repr(e))
Example #13
0
def step2_main_calc(path_input_file, n_proc=0, progress_print_interval=1):

    dir_work = os.path.dirname(path_input_file)
    kwargs_file_name = os.path.basename(path_input_file)

    # Load kwargs
    list_kwargs = pload(open(path_input_file, "rb"))

    # Identify the id string
    id_ = kwargs_file_name.split(" - ")[0]

    # Check number of processes are to be used
    n_proc = os.cpu_count() if int(n_proc) < 1 else int(n_proc)

    # SIMULATION
    print("SIMULATION STARTS")
    print(("{}{}\n" * 2).format("Number of Threads:", n_proc,
                                "Total Simulations:", len(list_kwargs)))
    time_count_simulation = time.perf_counter()
    m = mp.Manager()
    q = m.Queue()
    p = mp.Pool(n_proc)
    jobs = p.map_async(worker, [(kwargs, q) for kwargs in list_kwargs])
    count_total_simulations = len(list_kwargs)
    while progress_print_interval:
        if jobs.ready():
            break
        else:
            print("SIMULATION COMPLETE {:3.0f} %...".format(
                q.qsize() * 100 / count_total_simulations))
            time.sleep(progress_print_interval)
    results = jobs.get()
    time_count_simulation = time.perf_counter() - time_count_simulation
    print("SIMULATION COMPLETE IN {:0.3f} SECONDS".format(
        time_count_simulation, progress_print_interval))

    # POST PROCESS
    print("POST PROCESSING STARTS")
    # format outputs
    results = np.array(results, dtype=float)
    df_outputs = pd.DataFrame({
        "TIME EQUIVALENCE [min]": results[:, 0] / 60.,
        "SEEK STATUS [bool]": results[:, 1],
        "WINDOW OPEN FRACTION [%]": results[:, 2],
        "FIRE LOAD DENSITY [MJ/m2]": results[:, 3],
        "FIRE SPREAD SPEED [m/s]": results[:, 4],
        "BEAM POSITION [m]": results[:, 5],
        "MAX. NEAR FIELD TEMPERATURE [C]": results[:, 6],
        "FIRE TYPE [0:P., 1:T.]": results[:, 7],
        "PEAK STEEL TEMPERATURE [C]": results[:, 8] - 273.15,
        "PROTECTION THICKNESS [m]": results[:, 9],
        "SEEK ITERATIONS": results[:, 10]
    })
    df_outputs = df_outputs[[
        "TIME EQUIVALENCE [min]", "PEAK STEEL TEMPERATURE [C]",
        "PROTECTION THICKNESS [m]", "SEEK STATUS [bool]",
        "WINDOW OPEN FRACTION [%]", "FIRE LOAD DENSITY [MJ/m2]",
        "FIRE SPREAD SPEED [m/s]", "BEAM POSITION [m]",
        "MAX. NEAR FIELD TEMPERATURE [C]", "FIRE TYPE [0:P., 1:T.]"
    ]]
    df_outputs.sort_values(by=["TIME EQUIVALENCE [min]"], inplace=True)
    df_outputs.reset_index(drop=True, inplace=True)

    path_results_file = os.path.join(dir_work,
                                     "{} - {}".format(id_, "res_df.p"))
    pdump(df_outputs, open(path_results_file, "wb"))
Example #14
0
 def rec_dump(obj, file):
     if (isinstance(obj, SerializableObject)):
         obj.serialize(file)
     elif (isinstance(obj, Expression)):
         pdump(str(obj), file)
     elif (isinstance(obj, list) or isinstance(obj, tuple)):
         pdump(InitSequenceSerializable, file)
         for el in obj:
             pdump(get_class(el), file)
             rec_dump(el, file)
         pdump(FinishSequenceSerializable, file)
     elif (isinstance(obj, dict)):
         pdump(InitSequenceSerializable, file)
         for key in obj:
             pdump(get_class(key), file)
             rec_dump(key, file)
             pdump(get_class(obj[key]), file)
             rec_dump(obj[key], file)
         pdump(FinishSequenceSerializable, file)
     else:
         pdump(obj, file)
Example #15
0
def save_game():
    global SAVE_DATA
    if SAVE_DATA is None:
        SAVE_DATA = blank_save()
    f = open(SAVE_FILE_PATH,'wb'); pdump(SAVE_DATA,f); f.close()
Example #16
0
def build_people_model(host, port, **kwargs):
    global PROD_COMO
    ppl_model_data = 'ppl_model_data.pickle'
    batch_size = kwargs.get('batch_size', 10000)
    vocabulary = get_ingredient_vocabulary(host, port)

    # The tfidf_vect will ignore the following words
    stop_words = [
        '',
        'water',
        'glycerin',
        'titanium dioxide',
        'iron oxides',
        'beeswax',
        'methylparaben',
        'propylparaben',
        'propylene glycol',
        'panthenol',
        'mica']

    # Create vectorizers
    d_vect = DictVectorizer(sparse=False)
    tfidf_vect = TfidfVectorizer(
        tokenizer=get_ingredients_as_list,
        lowercase=False,
        stop_words=stop_words,
        vocabulary=vocabulary)

    print("Loading people from database, batch_size:", str(batch_size))
    ppl_filt = {}
    ppl_prjctn = {
        '_id': False,
        'race': True,
        'birth_sex': True,
        'age': True,
        'acne': True,
        'skin': True,
        'acne_products': True}  # Don't include any PII
    db_objects = PEOPLE_DB.read(ppl_filt, projection=ppl_prjctn)

    y, demo_mult = [], []
    batch_num, pulled = 0, 0
    X = None

    # Work in batches to build dataset
    while pulled <= db_objects.count(with_limit_and_skip=True):
        # Initialize
        X_demo_lst, X_prod_lst = [], []
        people = []

        print('Parsing batch:', batch_num)

        try:
            # Build a batch
            for i in range(batch_size):
                people.append(DB_Object.build_from_dict(db_objects.next()))
                pulled += 1
        except StopIteration:
        # End of available data
            break

        # Extract features
        for person in people:
            # Create new entry for each product
            # Note: Model is only applicable to entries with products
            for product_id in person.pop('acne_products'):
                # Pull product ingredients info
                X_prod_lst.append([product_id])

                # Pull demographic info
                X_demo_lst.append(person)

                # Generate demographic multiplier
                mult = get_multiplier(person)
                demo_mult.append(mult)

        # Vectorize data
        X_demo = d_vect.fit_transform(X_demo_lst)  # X_demo is now a numpy array
        X_prod = tfidf_vect.fit_transform(X_prod_lst)  # X_prod is now a CSR sparse matrix

        # Add batch result to output matrix
        if X is not None:
            X_t = hstack([csr_matrix(X_demo), X_prod], format="csr")
            try:
                X = vstack([X, X_t], format="csr")
            except ValueError:
                break
        else:
            # Initialize X
            X = hstack([csr_matrix(X_demo), X_prod], format="csr")

        batch_num += 1

    for como, mult in zip(PROD_COMO, demo_mult):
        val = como * mult
        if val < 6:
            y.append(0)
        elif val < 12:
            y.append(1)
        else:
            y.append(2)

    print('Storing vectorized data and training labels')
    # Flatten CSR sparse matrix to strings
    model = {
        'X': X,
        'y': y,
        'd_vect': d_vect,
        'tfidf_vect': tfidf_vect,
        'vocabulary': vocabulary
    }

    print("Saving model data to disk for next time")
    # Insert the model into the model database
    MODEL_DB.create_file(pdumps(model, protocol=2), filename="ml_people_data")
    # Save model data to disk
    with open(ppl_model_data, "wb") as pickle_out:
        pdump(model, pickle_out)
    print('[SUCCESS] People model data post-processed and stored')
Example #17
0
def step2_calc(df_input,
               dict_pref,
               path_input_file,
               progress_print_interval=5):
    # LOCAL SETTINGS
    # ==============

    # To limit memory usage when multiprocessing is employed, a maximum number of tasks is defined for a single process.
    # Therefore a process can not preserve data over this limit.

    mp_maxtasksperchild = 1000

    # Load kwargs

    dict_input_kwargs = df_input.to_dict(orient="index")
    list_kwargs = []
    for key, val in dict_input_kwargs.items():
        val["index"] = key
        list_kwargs.append(val)

    # Load settings

    dict_settings = dict_pref
    n_proc = dict_settings["n_proc"]

    # Check number of processes are to be used

    n_proc = os.cpu_count() if int(n_proc) < 1 else int(n_proc)

    # SIMULATION START

    print(
        __strformat_1_1.format("Input file:",
                               os.path.basename(path_input_file)))
    print(__strformat_1_1.format("Total simulations:", len(list_kwargs)))
    print(__strformat_1_1.format("Number of threads:", n_proc))

    time_simulation_start = time.perf_counter()
    m = mp.Manager()
    q = m.Queue()
    p = mp.Pool(n_proc, maxtasksperchild=mp_maxtasksperchild)
    jobs = p.map_async(calc_time_equiv_worker,
                       [(kwargs, q) for kwargs in list_kwargs])
    count_total_simulations = len(list_kwargs)
    n_steps = 24  # length of the progress bar
    while progress_print_interval:
        if jobs.ready():
            time_simulation_consumed = time.perf_counter(
            ) - time_simulation_start
            print("{}{} {:.1f}s".format('█' * round(n_steps), '-' * round(0),
                                        time_simulation_consumed))
            break
        else:
            p_ = q.qsize() / count_total_simulations * n_steps
            print("{}{} {:03.1f}%".format('█' * int(round(p_)),
                                          '-' * int(n_steps - round(p_)),
                                          p_ / n_steps * 100),
                  end='\r')
            time.sleep(1)
    p.close()
    p.join()
    results = jobs.get()

    # format outputs

    results = np.array(results)

    df_output = pd.DataFrame({
        'TIME STEP [s]':
        results[:, 0],
        'TIME START [s]':
        results[:, 1],
        'TIME LIMITING []':
        results[:, 2],
        'WINDOW HEIGHT [m]':
        results[:, 3],
        'WINDOW WIDTH [m]':
        results[:, 4],
        'WINDOW OPEN FRACTION []':
        results[:, 5],
        'ROOM BREADTH [m]':
        results[:, 6],
        'ROOM DEPTH [m]':
        results[:, 7],
        'ROOM HEIGHT [m]':
        results[:, 8],
        'ROOM WALL THERMAL INERTIA [J/m2s1/2K]':
        results[:, 9],
        'FIRE LOAD DENSITY [MJ/m2]':
        results[:, 10],
        'FIRE HRR DENSITY [MW/m2]':
        results[:, 11],
        'FIRE SPREAD SPEED [m/s]':
        results[:, 12],
        'FIRE DURATION [s]':
        results[:, 13],
        'BEAM POSITION [m]':
        results[:, 14],
        'BEAM RHO [kg/m3]':
        results[:, 15],
        'BEAM C [-]':
        results[:, 16],
        'BEAM CROSS-SECTION AREA [m2]':
        results[:, 17],
        'BEAM FAILURE TEMPERATURE [C]':
        results[:, 18],
        'PROTECTION K [W/m/K]':
        results[:, 19],
        'PROTECTION RHO [kg/m3]':
        results[:, 20],
        'PROTECTION C OBJECT []':
        results[:, 21],
        'PROTECTION THICKNESS [m]':
        results[:, 22],
        'PROTECTION PERIMETER [m]':
        results[:, 23],
        'ISO834 TIME ARRAY [s]':
        results[:, 24],
        'ISO834 TEMPERATURE ARRAY [K]':
        results[:, 25],
        'MAX. NEAR FIELD TEMPERATURE [C]':
        results[:, 26],
        'SEEK ITERATION LIMIT []':
        results[:, 27],
        'SEEK PROTECTION THICKNESS UPPER BOUND [m]':
        results[:, 28],
        'SEEK PROTECTION THICKNESS LOWER BOUND [m]':
        results[:, 29],
        'SEEK BEAM FAILURE TEMPERATURE TOLERANCE [K]':
        results[:, 30],
        'INDEX':
        results[:, 31],
        'TIME EQUIVALENCE [s]':
        results[:, 32],
        'SEEK STATUS [0:Fail, 1:Success]':
        results[:, 33],
        'FIRE TYPE [0:P, 1:T]':
        results[:, 34],
        'SOUGHT BEAM TEMPERATURE [K]':
        results[:, 35],
        'SOUGHT BEAM PROTECTION THICKNESS [m]':
        results[:, 36],
        'SOUGHT ITERATIONS []':
        results[:, 37],
        'BEAM TEMPERATURE TO FIXED PROTECTION THICKNESS [K]':
        results[:, 38],
        'FIRE TIME ARRAY [s]':
        results[:, 39],
        'FIRE TEMPERATURE ARRAY [K]':
        results[:, 40],
        'OPENING FACTOR [m0.5]':
        results[:, 41]
    })

    df_output.set_index(
        "INDEX", inplace=True)  # assign 'INDEX' column as DataFrame index

    df_output.sort_values('TIME EQUIVALENCE [s]',
                          inplace=True)  # sort base on time equivalence

    path_results_file = os.path.join(
        os.path.dirname(path_input_file), "{} - {}".format(
            os.path.basename(path_input_file).split('.')[0], __fn_output))
    pdump(df_output, open(path_results_file, "wb"))

    return df_output
Example #18
0
def step2_main_calc(path_input_file, progress_print_interval=5):
    # Settings (local)
    mp_maxtasksperchild = 1000


    # Make prefix, suffix, file and directory strings
    dir_work = os.path.dirname(path_input_file)
    name_kwargs_file = os.path.basename(path_input_file)
    id_ = name_kwargs_file.split(" - ")[0]

    # Load kwargs
    df_input_kwargs = pload(open(path_input_file, "rb"))
    dict_input_kwargs = df_input_kwargs.to_dict(orient="index")
    list_kwargs = []
    for key, val in dict_input_kwargs.items():
        val["index"] = key
        list_kwargs.append(val)
    # list_kwargs = [val for key, val in dict_input_kwargs.items()]

    # Load settings
    path_settings_file = os.path.join(dir_work, "{} - {}".format(id_, "prefs.p"))
    dict_settings = pload(open(path_settings_file, "rb"))
    n_proc = dict_settings["n_proc"]

    # Check number of processes are to be used
    n_proc = os.cpu_count() if int(n_proc) < 1 else int(n_proc)

    # Work out sleep time between each check print progress
    progress_print_sleep = len(list_kwargs) / 200
    if progress_print_sleep < 0.5:
        progress_print_sleep = 0.5
    elif progress_print_sleep > 500:
        progress_print_sleep = 500

    # SIMULATION
    print(strformat_1_1.format("Input file:", id_))
    print(strformat_1_1.format("Total simulations:", len(list_kwargs)))
    print(strformat_1_1.format("Number of threads:", n_proc))

    time_count_simulation = time.perf_counter()
    m = mp.Manager()
    q = m.Queue()
    p = mp.Pool(n_proc, maxtasksperchild=mp_maxtasksperchild)
    jobs = p.map_async(calc_time_equiv_worker, [(kwargs, q) for kwargs in list_kwargs])
    count_total_simulations = len(list_kwargs)
    progress_now = - progress_print_interval
    while progress_print_interval:
        if jobs.ready():
            break
        else:
            progress_now_ = int(q.qsize() * 100 / count_total_simulations)
            if progress_now_ >= (progress_now + progress_print_interval):
                progress_now = int(progress_now_/progress_print_interval) * progress_print_interval
                print(strformat_1_1_1.format("Simulation progress:", str(progress_now), "%"))
            time.sleep(progress_print_sleep)
    p.close()
    p.join()
    results = jobs.get()
    time_count_simulation = time.perf_counter() - time_count_simulation
    print(strformat_1_1_1.format("Simulation completed in:", str(int(time_count_simulation)), "s"))

    # format outputs
    results = np.array(results, dtype=float)
    df_outputs = pd.DataFrame({"TIME EQUIVALENCE [min]": results[:, 0]/60.,
                               "SEEK STATUS [bool]": results[:, 1],
                               "WINDOW OPEN FRACTION [%]": results[:, 2],
                               "FIRE LOAD DENSITY [MJ/m2]": results[:, 3],
                               "FIRE SPREAD SPEED [m/s]": results[:, 4],
                               "BEAM POSITION [m]": results[:, 5],
                               "MAX. NEAR FIELD TEMPERATURE [C]": results[:, 6],
                               "FIRE TYPE [0:P., 1:T.]": results[:, 7],
                               "PEAK STEEL TEMPERATURE TO GOAL SEEK [C]": results[:, 8]-273.15,
                               "PROTECTION THICKNESS [m]": results[:, 9],
                               "SEEK ITERATIONS [-]": results[:, 10],
                               "PEAK STEEL TEMPERATURE TO FIXED PROTECTION [C]": np.sort(results[:, 11])-273.15,
                               "INDEX": results[:, 12]})
    df_outputs = df_outputs[["TIME EQUIVALENCE [min]", "PEAK STEEL TEMPERATURE TO GOAL SEEK [C]", "PROTECTION THICKNESS [m]", "SEEK STATUS [bool]", "SEEK ITERATIONS [-]", "WINDOW OPEN FRACTION [%]", "FIRE LOAD DENSITY [MJ/m2]", "FIRE SPREAD SPEED [m/s]", "BEAM POSITION [m]", "MAX. NEAR FIELD TEMPERATURE [C]", "FIRE TYPE [0:P., 1:T.]", "PEAK STEEL TEMPERATURE TO FIXED PROTECTION [C]", "INDEX"]]
    df_outputs.set_index("INDEX", inplace=True)
    # df_outputs.sort_values(by=["TIME EQUIVALENCE [min]"], inplace=True)
    # df_outputs.reset_index(drop=True, inplace=True)

    path_results_file = os.path.join(dir_work, "{} - {}".format(id_, "res_df.p"))
    pdump(df_outputs, open(path_results_file, "wb"))
    saveprint(os.path.basename(path_results_file))
Example #19
0
        )
    )

# check install directory install serialisation file
# redfine this to include check where eqparse package is installed
#from eqparse import __eqp_dir, __eqp_memory_file, __eqp_memory_file_comb, __eqp_mac_address as mac_address
from eqparse import __eqp_dir, __eqp_memory_file, __eqp_memory_file_comb, __eqp_mac_address as mac_address
from pickle import dump as pdump
from pickle import load as pload

for file_name in [__eqp_memory_file, __eqp_memory_file_comb]:
	file = open(file_name,'wb')
	file.truncate()
	file.close()
	fprop = {};
	pdump(fprop,open(file_name,'wb'))

#file = open(__eqp_memory_file,'wb')
#file.truncate()
#file.close()
#fprop = {};
#pdump(fprop,open(__eqp_memory_file,'wb'))


#file = open(__eqp_memory_file_comb,'wb')
#file.truncate()
#fcomb = {}
#file.close()
#pdump(fcomb,open(__eqp_memory_file_comb,'wb'))

 
Example #20
0
 def __init__(self):
     super(DistributedTrainer, self).__init__()
     pdump(self.haar_locs, open("haar_locs.pyz", 'w'), -1)
Example #21
0
	def __init__(self):
		super(DistributedTrainer,self).__init__()
		pdump(self.haar_locs,open("haar_locs.pyz",'w'),-1)
Example #22
0
			
			else:
				phrase=ligne
		
		
			arbre= evaluation.readtree(evaluation.tokenize(phrase))[0]
			n,t = evaluation.nodesandleaves(arbre)
			nonterminaux.update(n)
			terminaux.update(t)
			
			productions=evaluation.getchildren(arbre)
		
			for elem in productions:
				leftside[elem] += len(productions[elem])
				for prod in productions[elem]:
					rightside[elem][prod] += 1
					
	for nt in rightside:
		sumproba=0
		for prod in rightside[nt]:
			prodproba=fractions.Fraction(rightside[nt][prod],leftside[nt])
			rightside[nt][prod]=prodproba
			sumproba += prodproba
		assert(sumproba==1)

	grammaire=CNF(terminaux, nonterminaux,rightside)
	
	with open(args.output,"wb") as f:
		pdump(grammaire,f)
	
	sink.glob_meanRho= interp_coarse_to_fine(glob.time,glob.meanRho, sink.time)
	#get local rmsV,rmsVa,meanRho
	data=np.loadtxt(args.rms_local_global+'local_rmsV_rmsVa.txt', dtype=np.string_)
	sort_ind= np.argsort(data[:,1]) #sorted index for time to increase monotonically
	sid_ind= np.arange(2,data.shape[1]-1,4) #index of each sink id, starts at 2, then every 4th index after that
	#get object to hold local rmsV,etc, data
	n_hdf5_files= data.shape[0]
	local= LocalData(args.NSinks,n_hdf5_files,npts+1)
	#get LocalData from data
	local.time= data[:,1][sort_ind].astype(np.float128)
	#local data from each sink
	for cnt,i in enumerate(sid_ind):
		print "cnt,i= ",cnt,i
		local.sid[cnt]= data[:,i][0] #redundant, but keeps writting in the correct sid
		print "sid[cnt],data[:,i][0]=",local.sid[cnt],data[:,i][0]
		local.rmsV[:,cnt]= data[:,i+1][sort_ind].astype(np.float128) #i+1 is ith sink's rmsV at all times
		local.rmsVa[:,cnt]= data[:,i+2][sort_ind].astype(np.float128) 
		local.meanRho[:,cnt]= data[:,i+3][sort_ind].astype(np.float128) 
	#interpolate
	for i in range(args.NSinks):
		sink.rmsV[:,i]= interp_coarse_to_fine(local.time,local.rmsV[:,i], sink.time)
		sink.rmsVa[:,i]= interp_coarse_to_fine(local.time,local.rmsVa[:,i], sink.time)
		sink.meanRho[:,i]= interp_coarse_to_fine(local.time,local.meanRho[:,i], sink.time)
#save data 
print "saving data"
if args.rms_local_global: name="sinks_yesRms.pickle"
else: name= "sinks_noRms.pickle"
fout=open(os.path.join(args.path,name),"w")
pdump(sink,fout)
fout.close()
Example #24
0
def main():
    args = parse_args()

    texts_mapping = jload(open(args.mapping_path))

    # для кросс-языковой векторизации должно быть указано направление и путь к общей матрице векторов
    lang_required = {'cross': ['direction', 'common_output_vectors_path']}
    check_args(args, 'lang', lang_required)

    # для кроссязыковой векторизации
    if args.lang == 'cross':
        model_required = {'model': ['src_embeddings_path', 'tar_embeddings_path'],
                          'translation': ['tar_embeddings_path', 'bidict_path'],
                          'projection': ['src_embeddings_path', 'tar_embeddings_path', 'projection_path']
                          }
        check_args(args, 'method', model_required)

        if args.method == 'translation':
            args.src_embeddings_path = args.tar_embeddings_path

        directions = {d: lang for d, lang in zip(['src', 'tar'], args.direction.split('-'))}
        print(directions)

        print('Векторизую src')
        src_vectorized = main_onelang('src', directions['src'], texts_mapping,
                                      args.src_lemmatized_path, args.src_embeddings_path,
                                      args.src_output_vectors_path, args.method, args.no_duplicates,
                                      args.projection_path, args.bidict_path, args.forced)
        # print(src_vectorized)
        print('Векторизую tar')
        tar_vectorized = main_onelang('tar', directions['tar'], texts_mapping,
                                      args.tar_lemmatized_path, args.tar_embeddings_path,
                                      args.tar_output_vectors_path, args.method, args.no_duplicates,
                                      args.projection_path, args.bidict_path, args.forced)
        # print(tar_vectorized)

        # собираем общие матрицу и маппинг
        common_len = len(src_vectorized) + len(tar_vectorized)
        emb_dim = src_vectorized.shape[1]
        common_vectorized = np.zeros((common_len, emb_dim))
        print(common_vectorized.shape)

        common2i = {}
        i2common = {}

        common_vectorized, common2i, i2common = to_common(texts_mapping, common2i, i2common,
                                                          common_vectorized, tar_vectorized,
                                                          directions['tar'], start_from=0)
        common_vectorized, common2i, i2common = to_common(texts_mapping, common2i, i2common,
                                                          common_vectorized, src_vectorized,
                                                          directions['src'],
                                                          start_from=len(tar_vectorized))

        pdump(common_vectorized, open(args.common_output_vectors_path, 'wb'))

        texts_mapping['cross2i'] = common2i
        texts_mapping['i2cross'] = i2common
        jdump(texts_mapping, open(args.mapping_path, 'w', encoding='utf-8'))

        # print(i2common)
        # print(common2i)

    # для векторизации одноязычного корпуса (без сборки общей матрицы векторов и общего маппинга)
    else:
        model_required = {'model': ['src_embeddings_path'],
                          'translation': ['tar_embeddings_path', 'bidict_path'],
                          'projection': ['src_embeddings_path', 'tar_embeddings_path',
                                         'projection_path']
                          }
        check_args(args, 'method', model_required)

        if args.method == 'translation':
            args.src_embeddings_path = args.tar_embeddings_path

        print('Векторизую корпус')
        src_vectorized = main_onelang('src', args.lang, texts_mapping,
                                      args.src_lemmatized_path,
                                      args.src_embeddings_path, args.src_output_vectors_path,
                                      args.method, args.no_duplicates, args.projection_path,
                                      args.bidict_path, args.forced)
Example #25
0
    def serialize(self, file):
        r'''
            Method to dump an object into a file.

            This methods writes to a file (it opens it if it is given as a string) an object.

            This method relies on the package ``pickle`` for those objects that are not serializable. Otherwise, it will
            use a recursive serialization procedure to write the whole object.
        '''
        from pickle import dump as pdump
        from sage.all import Expression

        ## Auxiliar method for the recursive loading procedure
        def rec_dump(obj, file):
            if (isinstance(obj, SerializableObject)):
                obj.serialize(file)
            elif (isinstance(obj, Expression)):
                pdump(str(obj), file)
            elif (isinstance(obj, list) or isinstance(obj, tuple)):
                pdump(InitSequenceSerializable, file)
                for el in obj:
                    pdump(get_class(el), file)
                    rec_dump(el, file)
                pdump(FinishSequenceSerializable, file)
            elif (isinstance(obj, dict)):
                pdump(InitSequenceSerializable, file)
                for key in obj:
                    pdump(get_class(key), file)
                    rec_dump(key, file)
                    pdump(get_class(obj[key]), file)
                    rec_dump(obj[key], file)
                pdump(FinishSequenceSerializable, file)
            else:
                pdump(obj, file)

        def get_class(obj):
            if (obj is None):
                return None
            return obj.__class__

        # Checking the file is opened
        is_str = (type(file) == str)
        if (is_str): file = open(file, "wb+")

        # Serializing the list of types for args
        pdump([get_class(obj) for obj in self.sargs()], file)

        # Serializing the arguments
        for obj in self.sargs():
            rec_dump(obj, file)

        # Serializing the list of named arguments
        pdump([(key, get_class(self.skwds()[key])) for key in self.skwds()],
              file)

        # Serializing the arguments
        for key in self.skwds():
            rec_dump(self.skwds()[key], file)

        # Closing the file if we opened it
        if (is_str): file.close()
    i += size

times = comm.gather(times, root=0)
if rank == 0:
    all_times = {}
    for d in times:
        for k, v in d.iteritems():
            all_times.setdefault(k, []).append(v)

phi_mean = comm.gather(phi_mean, root=0)
if rank == 0:
    all_phi_mean = {}
    for d in phi_mean:
        for k, v in d.iteritems():
            all_phi_mean.setdefault(k, []).append(v)

phi_median = comm.gather(phi_median, root=0)
if rank == 0:
    all_phi_median = {}
    for d in phi_median:
        for k, v in d.iteritems():
            all_phi_median.setdefault(k, []).append(v)

# save data
if rank == 0:
    name = spath + "K06_Table1_logrBL_%.1f_M0_%.1f.pickle" % (log_rB_L, M0i)
    fout = open(name, "w")
    pdump((times, phi_mean, phi_median), fout)
    fout.close()
    print "gathered and pickle dumped data"
Example #27
0
def main_onelang(direction, lang, texts_mapping, lemmatized_path,
                 embeddings_path, output_vectors_path, method, no_duplicates,
                 projection_path, bidict_path, forced):
    i2lang = 'i2{}'.format(lang)

    # собираем лемматизированные тексты из lemmatized
    if not os.path.isfile(
            lemmatized_path):  # ничего ещё из этого корпуса не разбирали
        raise NotLemmatizedError()

    else:  # если существует уже разбор каких-то файлов
        lemmatized_dict = jload(open(lemmatized_path, encoding='utf-8'))
        print('Понял, сейчас векторизуем.')

        # подгружаем старое, если было
        old_vectorized = load_vectorized(output_vectors_path, forced)

        # появились ли новые номера в маппинге
        n_new_texts = len(texts_mapping[i2lang]) - len(old_vectorized)
        print('Новых текстов: {}'.format(n_new_texts))

        if not n_new_texts:  # если не нашлось новых текстов
            return old_vectorized

        else:
            # собираем всё, что есть лемматизированного и нелемматизированного
            lemmatized_corpus = get_lemmatized_corpus(texts_mapping, i2lang,
                                                      lemmatized_dict,
                                                      n_new_texts)
            # for i in lemmatized_corpus:
            #     print(i)

            # для tar всегда загружаем верисю model
            vectorizer = build_vectorizer(direction, method, embeddings_path,
                                          no_duplicates, projection_path,
                                          bidict_path)

            # за размер нового корпуса принимаем длину маппинга
            new_vectorized = np.zeros(
                (len(texts_mapping[i2lang]), vectorizer.dim))

            # заполняем старые строчки, если они были
            for nr, line in enumerate(old_vectorized):
                new_vectorized[nr, :] = line
            # print(new_vectorized)
            # print(new_vectorized.shape)

            new_vectorized, not_vectorized = vectorize_corpus(
                lemmatized_corpus,
                new_vectorized,
                vectorizer,
                starts_from=len(old_vectorized))

            if output_vectors_path:
                pdump(new_vectorized, open(output_vectors_path, 'wb'))

            if not_vectorized:
                print('Не удалось векторизовать следующие тексты:\n{}'.format(
                    '\t'.join(not_vectorized)))

            return new_vectorized