def _get_informativity(informativity, ngrams, N): print 'Calculating informativity...' # calculate doc-level informativity for doc_id, sig_dict in informativity['doc'].iteritems(): sig_dict = dict(sig_dict) # convert sigma defaultdict to dict for sigma, sizes in sig_dict.iteritems(): sigma_count = ngrams['doc'][doc_id][0][sigma] for n, values in sizes.iteritems(): sizes[n] = sum([v for _, v in values]) / sigma_count informativity['doc'][doc_id] = sig_dict # calculate corpus-level informativity for sigma, sizes in informativity['corpus'].iteritems(): sigma_count = ngrams['corpus'][0][sigma] for n, values in sizes.iteritems(): sizes[n] = sum([v for _, v in values]) / sigma_count # convert sigma defaultdict to dict informativity['corpus'] = dict(informativity['corpus']) # pickle posteriors to file with open(INFORMATIVITY_FILE % N, 'wb') as f: pickle.dump(informativity, f, protocol=pickle.HIGHEST_PROTOCOL)
def create_index(number): create_stop() file = open('stop.pkl', 'rb') stop = pickle.load(file) file.close() #inverse=defaultdict(lambda : defaultdict(int)) inverse = {} col = {} for i in range(0, 3): doc = 'd' + str(i) col[doc] = defaultdict(int) for word in preprocess(i): if word not in stop: # it uses the dictionary's hashing contrarly to using .key() col[doc][word] += 1 if word in inverse.keys(): if doc not in inverse[word].keys(): inverse[word][doc] += 1 else: inverse[word] = defaultdict(int) inverse[word][doc] += 1 file = open('structure.pkl', 'wb') pickle.dump(col, file) file.close() return col
def saveData(data): # timestr = time.strftime("%Y%m%d-%H%M%S") name = raw_input('Save file as: ') filename = "emg_" + name with open("emg-data/" + filename + ".pkl", 'wb') as fp: pickle.dump(data, fp) print 'File saved as:', filename
def runParallel(inmodel): print 'does not work properly. EXIT' sys.exit(0) ProBar = progressbar.ProgressBar(maxval=iterations).start() misfits={} misfits['pMF']=[]; misfits['sMF']=[] misfits['ScsMF']=[]; misfits['ScssMF']=[] loadmod=cake.load_model(inmodel) for latindx, lat in enumerate(_lats): for lonindx, lon in enumerate(_lons): for zindex, z in enumerate(_depths): #iteration+=1 # Start prozess with one event (depth), and one model: eve=model.Event(lat,lon,str_to_time("2010-04-11 22:08:15.500"), "Spain_Durcal" , z,6.3) [ttpdiff, ttsdiff, ttScsdiff, ttScssdiff] = depthfinder.startup(loadmod, eve, maxdist) pMF, sMF, ScsMF, ScssMF= map( lambda x: calculateMisfit(x,maxdist), [ttpdiff,ttsdiff,ttScsdiff,ttScssdiff]) resultArray[latindx][lonindx][zindex] = [pMF, sMF, ScsMF, ScssMF] # update progressbar ProBar.update(iteration) identifierstring = inmodel+'.%s.%s.%s'%(lat, lon, z) results[identifierstring]=misfits try: output = open('results.p','w') pickle.dump(results, output) finally: output.close()
def getDates(): try: import cpickle as pickle except ImportError: import pickle as pickle import datetime dateList = [] dateHistFile = "dateList.d" try: dateList = pickle.load(open(dateHistFile, "rb")) except IOError: #Change this to the min date instead of a fixed date #But then we need to consider if the holiday dates are in the same range dateList = map(okDate, [ datetime.datetime(2012, 12, 31, 0, 0) + datetime.timedelta(days=x) for x in range(0, 365) ]) dateList = [dateVal for dateVal in dateList if dateVal is not None] #If this file is being run or loaded, then we need to recreate the pickled dateList pickle.dump(dateList, open(dateHistFile, "wb")) return dateList
def _init_unicode(): """ Prepare unicode property tables and key pattern """ global _loaded global _unicode_properties global _unicode_key_pattern if _use_cache is not None: props = join(_use_cache, "%s_unicode_properties.cache" % _cache_prefix) if (not exists(join(_use_cache, "%s_unicode_properties.cache" % _cache_prefix))): _unicode_properties = _build_unicode_property_table((0x0000, 0x10FFFF)) _unicode_key_pattern = _build_unicode_key_pattern() try: with open(props, 'wb') as f: pickle.dump(_unicode_key_pattern, f) pickle.dump(_unicode_properties, f) except: if exists(props): unlink(props) else: try: with open(props, 'rb') as f: _unicode_key_pattern = pickle.load(f) _unicode_properties = pickle.load(f) except: if exists(props): unlink(props) _unicode_properties = _build_unicode_property_table((0x0000, 0x10FFFF)) _unicode_key_pattern = _build_unicode_key_pattern() else: _unicode_properties = _build_unicode_property_table((0x0000, 0x10FFFF)) _unicode_key_pattern = _build_unicode_key_pattern() _loaded = True
def _init_unicode(): """ Prepare unicode property tables and key pattern. """ global _loaded global _unicode_properties global _unicode_key_pattern if _use_cache is not None: props = join(_use_cache, "%s_unicode_properties.cache" % _cache_prefix) if (not exists(join(_use_cache, "%s_unicode_properties.cache" % _cache_prefix))): _unicode_properties = _build_unicode_property_table((0x0000, 0x10FFFF)) _unicode_key_pattern = _build_unicode_key_pattern() try: with open(props, 'wb') as f: pickle.dump(_unicode_key_pattern, f) pickle.dump(_unicode_properties, f) except Exception: if exists(props): unlink(props) else: try: with open(props, 'rb') as f: _unicode_key_pattern = pickle.load(f) _unicode_properties = pickle.load(f) except Exception: if exists(props): unlink(props) _unicode_properties = _build_unicode_property_table((0x0000, 0x10FFFF)) _unicode_key_pattern = _build_unicode_key_pattern() else: _unicode_properties = _build_unicode_property_table((0x0000, 0x10FFFF)) _unicode_key_pattern = _build_unicode_key_pattern() _loaded = True
def test_pickle(obj): """Test if an object can successfully be pickled and loaded again Returns True if succeeds False if fails """ import pickle # sys.setrecursionlimit(10000) path = 'test.p.tmp' if os.path.isfile(path): os.remove(path) # remove temp file try: with open(path, 'wb') as f: pickle.dump(obj, f) print('Pickled object') with open(path, 'rb') as f: out = pickle.load(f) print('Loaded object') except Exception as e: print('{}'.format(e)) return False if os.path.isfile(path): print('Pickled file size: {:g} Bytes'.format(os.path.getsize(path))) os.remove(path) # remove temp file import pdb; pdb.set_trace() print('In:\n{}\nOut:\n{}'.format(out, obj)) if not isinstance(obj, dict): out = out.__dict__ obj = obj.__dict__ if compare_dict(out, obj): return True else: return False
def save(self, filename): """ Save to a zipped pickle file. The resulting file can be used to initialize Engine object using `Engine.load(filename)`. Parameters ---------- filename : str """ dat = { 'init_args': self._init_args, 'rng_state': { 'np': np.random.get_state(), 'py': random.getstate() }, 'cls_attrs': { 'models': self._models, 'n_models': self._n_models, 'diagnostic_tables': self._diagnostic_tables, 'converters': self._converters } } with open(filename, 'wb') as f: pkl.dump(dat, f)
def save(self, file_name): """ Save the buffer in a file. @param file_name """ with open(file_name, 'wb') as f: pickle.dump(self.buffer, f, protocol=pickle.HIGHEST_PROTOCOL)
def create_index(self,number): documents = self.collection_processing(r"cacm\cacm.all") stop = set(stopwords.words('english')) inverse={} col={} for i in range(0,number): doc = re.sub(r"[\/()&~£{}%_:+*\"\]\[,.;@#-?!&$«»\n]|[1-9]+\ *", " ", documents[i][0])+str(i+1) col[doc]=defaultdict(int) col[doc]['max']=0 for word in self.preprocess(i,documents,1): if word not in stop : # it uses the dictionary's hashing contrarly to using .key() col[doc][word]+=1 if (col[doc][word]>col[doc]['max']): col[doc]['max']=col[doc][word] if word in inverse: if doc not in inverse[word]: inverse[word][doc]+=1 else: inverse[word]=defaultdict(int) inverse[word][doc]+=1 file = open('inversed.pkl', 'wb') pickle.dump(inverse,file) file.close() file = open('index.pkl', 'wb') pickle.dump(col,file) file.close() return col
def start_slices(self): """ Second step: start previously conflictive (non-standard) slices at FlowVisor. """ errors = [] slice_ids = [] # If 'conflictive_slice_ids' file exists, do the following. # Otherwise warn and skip. try: f = open("%s/conflictive_slice_ids" % path, "r") ids = pickle.load(f) f.close() os.remove("%s/conflictive_slice_ids" % path) slices = filter_own_slices(ids) for (counter, slice) in enumerate(slices): aggs = slice.aggregates.filter(leaf_name="OpenFlowAggregate") slice_id = str(settings.SITE_DOMAIN) + "_" + str(slice.id) slice_ids.append({ "id": slice_id, "keywords": ids[counter]['keywords'] }) for agg in aggs: try: print "Starting", slice_id, "at aggregate", str(agg) time.sleep(FLOWVISOR_SLEEP_TIME) with Timeout(TIMEOUT): agg.as_leaf_class().client.proxy.create_slice( slice_id, slice.project.name, slice.project.description, slice.name, slice.description, slice.openflowsliceinfo.controller_url, slice.owner.email, slice.openflowsliceinfo.password, agg.as_leaf_class()._get_slivers(slice)) # Starting slice at Expedient slice.started = True slice.save() except Exception as e: message = """Could not fix the naming in Aggregate Manager %s for slice with the following details: name:\t\t %s FlowVisor name:\t\t %s The cause of the error is: %s. Please try to fix it manually""" % (str( agg.as_leaf_class()), slice.name, slice_id, e) send_mail( "OCF: error while standardizing Flowvisor slices", message, "*****@*****.**", [settings.ROOT_EMAIL]) errors.append(message) # Save the slice IDs to grant flowspace nevertheless of other errors f = open("%s/slice_ids_to_grant_fs" % path, "w") pickle.dump(slice_ids, f) f.close() if errors: return "\033[93mFailure while starting previously non-standard slices at FlowVisor: %s\033[0m" % str( errors) else: return "\033[92mSuccessfully started previously non-standard slices at FlowVisor\033[0m" except Exception as e: print e return "\033[93mCould not access file with slice IDs. Skipping...\033[0m\n"
def getDates(): try: import cpickle as pickle except ImportError: import pickle as pickle import datetime dateList=[] dateHistFile = "dateList.d" try: dateList=pickle.load( open( dateHistFile, "rb" ) ) except IOError: #Change this to the min date instead of a fixed date #But then we need to consider if the holiday dates are in the same range dateList = map(okDate,[ datetime.datetime(2012, 12, 31, 0, 0) + datetime.timedelta(days=x) for x in range(0,365)]) dateList = [dateVal for dateVal in dateList if dateVal is not None] #If this file is being run or loaded, then we need to recreate the pickled dateList pickle.dump( dateList, open( dateHistFile, "wb" )) return dateList
def runSerial(models=None): ''' Execute serial processing (1 CPU). :param models: list of models to investigate if no models are given, all models will be investigated. ''' if models==None: models2use = _models else: models2use = [models] iteration=0 iterations = len(models2use)*len(_depths)*len(_lats)*len(_lons) sys.stdout.write('calculating misfits... ') ProBar = progressbar.ProgressBar(maxval=iterations).start() # instantiate result array as numpy nd array: resultArray = np.ndarray(shape=(len(_lats), len(_lons), len(_depths), 4)) for mod in models2use: misfits={} misfits['pMF']=[]; misfits['sMF']=[] misfits['ScsMF']=[]; misfits['ScssMF']=[] loadmod=cake.load_model(mod) for latindx, lat in enumerate(_lats): for lonindx, lon in enumerate(_lons): for zindex, z in enumerate(_depths): iteration+=1 eve=model.Event(lat,lon,str_to_time("2010-04-11 22:08:15.500"), "Spain_Durcal" , z,6.3) [ttpdiff, ttsdiff, ttScsdiff, ttScssdiff] = depthfinder.startup(loadmod, eve, maxdist) [pMF, sMF, ScsMF, ScssMF]= map( lambda x: calculateMisfit(x,maxdist), [ttpdiff,ttsdiff,ttScsdiff,ttScssdiff]) # update progressbar ProBar.update(iteration) # write data to numpy array: resultArray[latindx][lonindx][zindex] = [pMF, sMF, ScsMF, ScssMF] results[mod]=resultArray depthfinder.storeStationBox() # finish progressbar: ProBar.finish() # write dict to pickled data: try: output = open('numpy_results.p','w') pickle.dump(results, output) finally: output.close() # write used stations file: depthfinder._myStationBox.writeUsedStationsFile()
def storeStationBox(self): ''' Write dict to pickled data: ''' try: picklecards = open('stationBox.p','w') pickle.dump(self, picklecards) finally: picklecards.close()
def save_temps(filename): f = open(filename, 'w') data = cpickle.load(f) for i in range(1,9): temp = read_temp(i) now = datetime.datetime.now() data[now] = [i, temp] time.sleep(0.5) cpickle.dump(data, f) f.close()
def writeToFile(self, filename): """ Save all contacts in the model to a file. """ try: f = open(filename, "wb") pickle.dump(self.tableModel.addresses, f) except IOError: QMessageBox.information(self, "Unable to open file: %s" % filename) finally: f.close()
def create_stop(): stop = {} file = open(r'C:\Users\ASUS X541U\Documents\TP RI\stopwords_fr.txt', 'r', encoding='utf-8') for f in file.readlines(): stop[f.replace('\n', '')] = 1 file.close() file = open('stop.pkl', 'wb') pickle.dump(stop, file) file.close()
def save_project_object(proj_obj, date=pmh.get_current_date): '''Saves project object into python pickle format''' # Params: # proj_obj (object): class initialized for project # date (datetime obj): python datetime object specifying date # to include in the save file name date_str = date.strftime('_%Y%m%d_%H:%M') pkl_file_name = proj_obj.__name__ + date_str + '.pkl' with open(pkl_file_name, 'wb') as pf: pickle.dump(proj_obj, pf) return
def tfidf_features(X_train, X_test, vectorizer_path): tfidf_vectorizer = TfidfVectorizer(min_df=5, max_df=0.9, ngram_range=(1, 2), token_pattern='(\S+)') X_train = tfidf_vectorizer.fit_transform(X_train) X_test = tfidf_vectorizer.transform(X_test) with open(vectorizer_path, 'wb') as vectorizer_file: pickle.dump(tfidf_vectorizer, vectorizer_file) return X_train, X_test
def saveObj(dataFile=None,dataObj={}): try: import cpickle as pickle except ImportError: import pickle as pickle try: pickle.dump( dataObj, open(dataFile, "wb" )) except IOError: return -1 return 1
def add_user(username=None, password=None, **_): if password is None: password = getpass() user_uuid = uuid1() device = DeviceInfoFactory.prompt_device() u = AutoSignUser(user_uuid, from_username_password(username, password, device)) makedirs(path.join(*base_path)) with open(path.join(*base_path, f'{user_uuid}.userdump.pickle'), 'wb') as p: P.dump(u, p) return 0
def finalize(self, result): for k, v in self._known_hashes.items(): self._hashes.setdefault(k, v) for k, v in self._known_graph.items(): self._graph.setdefault(k, v) for m in self._failed_test_modules: log.debug('Module failed: %s' % (m, )) self._hashes.pop(m, None) with open(self.hash_file, 'w') as f: dump({'hashes': self._hashes, 'graph': self._graph}, f)
def finalize(self, result): for k, v in self._known_hashes.iteritems(): self._hashes.setdefault(k, v) for k, v in self._known_graph.iteritems(): self._graph.setdefault(k, v) for m in self._failed_test_modules: log.debug('Module failed: %s' % (m,)) self._hashes.pop(m, None) f = open(self.hash_file, 'w') dump({'hashes': self._hashes, 'graph': self._graph}, f) f.close()
def _get_ngrams(N): print 'Counting ngrams...' # create ngrams dictionary N_dict = lambda: {i: defaultdict(float) for i in xrange(-1, N)} ngrams = {'doc': {}, 'corpus': N_dict()} # set corpus-level total (excluding punctuation) corpus_total = Token.query.filter_by(punctuation=False).count() ngrams['corpus'][-1][''] = corpus_total for d in Doc.query.all(): # add doc-level ngrams dictionary ngrams['doc'][d.id] = N_dict() # set doc-level total (excluding punctuation) doc_total = sum(s.get_token_count() for s in d.sentences) ngrams['doc'][d.id][-1][''] = doc_total for s in d.sentences: # impose sentence-initial marker # (Note that there is no need for a sentence-final marker, as </s> # will never appear as context, nor will we ever calculate the # informativity of </s>.) sent = ['<s>'] + [t.word.lower() for t in s.tokens] # extract ngram counts for i in xrange(len(sent)): tok = s.tokens[i-1] if i else None for n in range(N): if i >= n: ngram = ' '.join([sent[t] for t in xrange(i-n, i+1)]) ngrams['doc'][d.id][n][ngram] += 1 # doc-level ngrams['corpus'][n][ngram] += 1 # corpus-level # store each ngram string try: setattr(tok, 'gram_%s' % str(n + 1), ngram) except AttributeError: continue db.session.commit() # pickle ngram counts to file with open(NGRAM_FILE % N, 'wb') as f: pickle.dump(ngrams, f, protocol=pickle.HIGHEST_PROTOCOL) return ngrams
def run_job(f, *args, **kwargs): ''' Internal function, runs the function in a remote process. Uses fork() to perform it. Availability: Unix ''' out_file = tempfile.mkstemp() os.close(out_file[0]) pid = os.fork() if pid != 0: # parent: wait for the child pid, status = os.waitpid(pid, 0) # read output file #print('read from', os.getpid(), ':', out_file[1]) if os.stat(out_file[1]).st_size == 0: # child did not write anything os.unlink(out_file[1]) raise OSError('child did not output anything') if status != 0: os.unlink(out_file[1]) raise RuntimeError('subprocess error: %d' % status) result = pickle.load(open(out_file[1], 'rb')) os.unlink(out_file[1]) # traceback objects cannot be pickled... #if isinstance(result, tuple) and len(result) == 3 \ #and isinstance(result[1], Exception): ## result is an axception with call stack: reraise it #raise result[0], result[1], result[2] if isinstance(result, Exception): raise result return result # child process try: try: #print('exec in', os.getpid(), ':', f, args, kwargs) result = f(*args, **kwargs) #print('OK') except Exception as e: # traceback objects cannot be pickled... #result = (type(e), e, sys.exc_info()[2]) result = e #print('write:', out_file[1], ':', result) try: pickle.dump(result, open(out_file[1], 'wb'), protocol=2) except Exception as e: print('pickle failed:', e, '\nfor object:', type(result)) finally: # sys.exit() is not enough os._exit(0)
def save_pyeq(equilibrium,filename): try: import cpickle as pickle except: import pickle """ Simple wrapper around pickle to save equilbria as .pyeq files """ if filename.split('.')[-1] is not 'pyeq': filename += '.pyeq' file = open(filename,'wb') try: pickle.dump(equilbrium,file) except: raise IOError('Error: unable to pickle data to file --> '+filename)
def save_iteration(self, X, Y, new_x): """ Saves an iteration. :param X: Data for the model (including the new observation) :param Y: Data for the model (including the new observation) :param new_x: the new observation """ max_iteration = self._get_last_iteration_number() iteration_folder = self.save_dir + "/%03d" % (max_iteration + 1, ) #pickle.dump(self, open(iteration_folder+"/bayesian_opt.pickle", "w")) os.makedirs(iteration_folder) if hasattr(self.acquisition_fkt, "_get_most_probable_minimum") and not self.model_untrained: pickle.dump([new_x, X, Y, self.acquisition_fkt._get_most_probable_minimum()[0]], open(iteration_folder + "/observations.pickle", "w")) else: pickle.dump([new_x, X, Y, self.model.getCurrentBestX()], open(iteration_folder + "/observations.pickle", "w"))
def save_luma_map(force=False): now = int(time.time()) global LAST_SAVE if force or not LAST_SAVE or now - LAST_SAVE > SAVE_INTERVAL: try: with open(LUMA_FILE, "wb") as f: pickle.dump(LUMA_MAP, f) LAST_SAVE = now except Exception, e: print "WARNING: NOT SAVING LUMA MAP", e try: with open(CHANGES_FILE, "wb") as f: pickle.dump(LUMA_OBS, f) except Exception, e: print "WARNING: NOT SAVING LUMA MAP", e
def inverse_weight_shortest_path(G,weight_name='weight',dir='./',filename=' '): if G.is_directed(): T=nx.DiGraph(); else: T=nx.Graph(); T.add_nodes_from(G.nodes(data=True)); T.add_edges_from(G.edges()); for e in G.edges(data=True): T[e[0]][e[1]][weight_name]=float(1/float(G[e[0]][e[1]][weight_name])); distances=nx.floyd_warshall(T, weight=weight_name); if filename!=' ': nx.write_gexf(T,dir+filename.split('.')[-2]+'_inverse_edge_weight.gexf'); pickle.dump(distances,open(dir+filename+'_inverse_weight_shortest_path.pck','w')); print 'Distance file saved to '+dir+filename+'_inverse_weight_shortest_path.pck' return distances;
def weighted_index(self,number): index = get_index() inverse = get_inverse() weighted_index = {} for word in inverse: for doc in inverse[word]: weighted_index[word]=defaultdict(int) freq = inverse[word][doc] if freq == 0: weighted_index[word][doc]=0 else : max_doc = index[doc]['max'] # retourne la freq max dans dans le doc doc_number = len(inverse[word]) # nbr de docs qui contiennent word weighted_index[word][doc]+=float(freq) / (float(max_doc)*log10(number/doc_number) + 1.0) #poids(ti, dj)=(freq(ti,dj)/Max(freq(t, dj))*Log((N/ni) +1) print(weighted_index) file = open('weighted.pkl', 'wb') pickle.dump(weighted,file)
def inverse_weight_shortest_path(G,weight_name='weight',dir='./',filename=' '): if G.is_directed(): T=nx.DiGraph(); else: T=nx.Graph(); T.add_nodes_from(G.nodes(data=True)); T.add_edges_from(G.edges()); for e in G.edges(data=True): T[e[0]][e[1]][weight_name]=float(1/float(G[e[0]][e[1]][weight_name])); distances=nx.floyd_warshall(T, weight=weight_name); if filename!=' ': nx.write_gexf(T,dir+filename.split('.')[-2]+'_inverse_edge_weight.gexf'); pickle.dump(distances,open(dir+filename+'_inverse_weight_shortest_path.pck','w')); print('Distance file saved to '+dir+filename+'_inverse_weight_shortest_path.pck') return distances;
def start_slices(self): """ Second step: start previously conflictive (non-standard) slices at FlowVisor. """ errors = [] slice_ids = [] # If 'conflictive_slice_ids' file exists, do the following. # Otherwise warn and skip. try: f = open("%s/conflictive_slice_ids" % path,"r") ids = pickle.load(f) f.close() os.remove("%s/conflictive_slice_ids" % path) slices = filter_own_slices(ids) for (counter, slice) in enumerate(slices): aggs = slice.aggregates.filter(leaf_name="OpenFlowAggregate") slice_id = str(settings.SITE_DOMAIN) + "_" + str(slice.id) slice_ids.append({"id":slice_id, "keywords":ids[counter]['keywords']}) for agg in aggs: try: print "Starting", slice_id, "at aggregate", str(agg) time.sleep(FLOWVISOR_SLEEP_TIME) with Timeout(TIMEOUT): agg.as_leaf_class().client.proxy.create_slice(slice_id, slice.project.name,slice.project.description,slice.name, slice.description, slice.openflowsliceinfo.controller_url, slice.owner.email, slice.openflowsliceinfo.password, agg.as_leaf_class()._get_slivers(slice)) # Starting slice at Expedient slice.started = True slice.save() except Exception as e: message = """Could not fix the naming in Aggregate Manager %s for slice with the following details: name:\t\t %s FlowVisor name:\t\t %s The cause of the error is: %s. Please try to fix it manually""" % (str(agg.as_leaf_class()),slice.name, slice_id, e) send_mail("OCF: error while standardizing Flowvisor slices", message, "*****@*****.**", [settings.ROOT_EMAIL]) errors.append(message) # Save the slice IDs to grant flowspace nevertheless of other errors f = open("%s/slice_ids_to_grant_fs" % path,"w") pickle.dump(slice_ids, f) f.close() if errors: return "\033[93mFailure while starting previously non-standard slices at FlowVisor: %s\033[0m" % str(errors) else: return "\033[92mSuccessfully started previously non-standard slices at FlowVisor\033[0m" except Exception as e: print e return "\033[93mCould not access file with slice IDs. Skipping...\033[0m\n"
def pickle_dump(obj, path, **kwargs): """Wrapper for pickle.dump, accepting multiple path formats (file, string, pathlib.Path). - Automatically appends .p if not pressent. - Uses cpickle when possible. - Automatically closes file objects.""" if isinstance(path, Path): path = str(path) if isinstance(path, basestring): if path[-2:] != '.p': path += '.p' with open(path, 'wb') as f: pickle.dump(obj, f, **kwargs) elif isinstance(path, file): pickle.dump(obj, path, **kwargs) path.close() else: raise ValueError('Unexpected path format')
def getHistory(): #add the try: import cpickle as pickle except ImportError: import pickle as pickle import datetime import time historyFile = "stockHist.d" try: histDict=pickle.load( open( historyFile, "rb" ) ) # Need to check if the data files have changed and if so recreate # Check the max date in the database against the retrieved array except IOError: from sys import getsizeof from symbols import symbolList from processFuncs import writeLog histDict={} recHistory=[] dateSearch = datetime.datetime(2013, 1, 2, 0, 0) #We know this is the starting date. Should probably use the tradingDates.py db=openData('stockData') useIndex(db,'nasdaq','Date','Symbol') #db.nasdaq.ensure_index([('Date','Symbol')]) writeLog("Creating Price History") #Symbol,Date,Open,High,Low,Close,Volume #prices={'AAPL':[('20130201',433.45,434.30,432.33,436.3,2030400),('20130202',433.55,434.50,432.33,436.3,2030400)]} #.strftime("%Y%m%d") for symbol in symbolList: recHistory = map(lambda x: (x['Date'].combine(x['Date'].date(),datetime.time(0,0,0)),x['Open'],x['High'],x['Low'],x['Close']),\ db.nasdaq.find({'Symbol':symbol},{'Date':1,'Open': 1,'High':1,'Low':1,'Close': 1, '_id':0 }).sort('Date',1)) #print recHistory #raw_input() histDict[symbol]=recHistory recHistory=[] pickle.dump( histDict, open( historyFile, "wb" )) return histDict
def getHistory(): #add the try: import cpickle as pickle except ImportError: import pickle as pickle import datetime import time historyFile = "stockHist.d" try: histDict = pickle.load(open(historyFile, "rb")) # Need to check if the data files have changed and if so recreate # Check the max date in the database against the retrieved array except IOError: from sys import getsizeof from symbols import symbolList from processFuncs import writeLog histDict = {} recHistory = [] dateSearch = datetime.datetime( 2013, 1, 2, 0, 0 ) #We know this is the starting date. Should probably use the tradingDates.py db = openData('stockData') useIndex(db, 'nasdaq', 'Date', 'Symbol') #db.nasdaq.ensure_index([('Date','Symbol')]) writeLog("Creating Price History") #Symbol,Date,Open,High,Low,Close,Volume #prices={'AAPL':[('20130201',433.45,434.30,432.33,436.3,2030400),('20130202',433.55,434.50,432.33,436.3,2030400)]} #.strftime("%Y%m%d") for symbol in symbolList: recHistory = map(lambda x: (x['Date'].combine(x['Date'].date(),datetime.time(0,0,0)),x['Open'],x['High'],x['Low'],x['Close']),\ db.nasdaq.find({'Symbol':symbol},{'Date':1,'Open': 1,'High':1,'Low':1,'Close': 1, '_id':0 }).sort('Date',1)) #print recHistory #raw_input() histDict[symbol] = recHistory recHistory = [] pickle.dump(histDict, open(historyFile, "wb")) return histDict
def _save_data(data, datafile, datafilefmt): ''' Save the data file for future runs. ''' if datafilefmt == 'json': with open(datafile, 'w', encoding='utf8') as f: # Need `ensure_ascii=False` to get Unicode f.write(json.dumps(data, indent=2, ensure_ascii=False)) f.write('\n') elif datafilefmt == 'json.zip': with zipfile.ZipFile(datafile, 'w', compression=zipfile.ZIP_DEFLATED) as z: fname = os.path.split(datafile)[1].rsplit('.', 1)[0] t = json.dumps(data, indent=2, ensure_ascii=False) + '\n' z.writestr(fname, t.encode('utf8')) elif datafilefmt in ('pickle', 'pkl'): with open(datafile, 'wb') as f: pickle.dump(data, f) else: raise ValueError('Invalid data file format {0}'.format(datafilefmt))
def write_scheduler_stamps(): """ Dump the stamps to the scheduler_stamps_file using pickle module. The stamp file is written to a temporary file, and then moved to optinos.scheduler_stamps_file. """ try: try: with open("%s.tmp" % options.scheduler_stamps_file, 'wb') as sock: pickle.dump(STAMPS, sock) os.rename("%s.tmp" % options.scheduler_stamps_file, options.scheduler_stamps_file) except NameError as e: log(" write_scheduler_stamps NameError: line %d: %s" % (sys.exc_info()[2].tb_lineno, e)) except IOError as e: log(" write_scheduler_stamps IOError: line %d: %s" % (sys.exc_info()[2].tb_lineno, e))
def inverse_index(): start = timeit.timeit() inversed = {} index = create_index(370) for doc in index.keys(): for word in index[doc].keys(): inversed[word] = defaultdict(int) for d in index.keys(): if d != doc: inversed[word][d] += 0 inversed[word][doc] += 1 file = open('inverse.pkl', 'wb') pickle.dump(inversed, file) file.close() return inversed end = timeit.timeit() print(end - start)
def intensityMap(self, plot=False): path = self._gfile.split("/")[-1] nR = 100 nphi = 200 try: fieldlines = pickle.load(open("fieldline_store/" + path + "__fl.p", "r")) except: fieldlines = [] Rstart = 1.37 phistart = 160.0 dR = 0.001 dphi = 0.2 for i in np.arange(nR): R = Rstart + i * dR fline = self.tracer.trace(R, 0.0, phistart * 2.0 * np.pi / 360.0, mxstep=10000, ds=0.01) for j in np.arange(nphi): fline.rotate_toroidal(dphi * 2.0 * np.pi / 360.0) line = self.projectFieldline(fline) indsR = np.where(np.array(fline.R) > 0.6)[0] inds = np.where(np.abs(fline.Z)[indsR] < 1.0)[0] fieldlines.append(line[inds]) pickle.dump(fieldlines, open("fieldline_store/" + path + "__fl.p", "w")) frame = self.bgsub.apply(self._currentframeData) intensity = [] for i in np.arange(len(fieldlines)): line = fieldlines[i] temp = [] points = np.zeros(frame.shape) for j in np.arange(line.shape[0]): yind, xind = int(line[j, 0]), int(line[j, 1]) if xind > 0 and xind < frame.shape[0] and yind > 0 and yind < frame.shape[1]: points[xind, yind] = 1.0 intensity.append(np.sum(points * frame)) intensity = np.array(intensity).reshape((nR, nphi)) if plot: fig = plt.figure() levels = np.linspace(np.min(intensity), np.max(intensity), 200) plt.contourf(np.linspace(1.37, 1.47, 100), np.linspace(160, 200, 200), intensity.T, levels=levels) plt.show() else: return intensity
def saveSession(filepath, session, helper=None): """ Saves dictionary session to file. :param filepath: path to save session file. :param session: dictionary :param helper: function to pre-process session :return: filename of saved session """ # safely save session file # with os.fdopen(os.open(filepath, os.O_WRONLY | os.O_CREAT, 0600), 'wb') # as logger: # http://stackoverflow.com/a/5624691/5288758 with secure_open(filepath, 'wb') as logger: if helper: serializer.dump(helper(session), logger, serializer.HIGHEST_PROTOCOL) # save dictionary else: # save dictionary serializer.dump(session, logger, serializer.HIGHEST_PROTOCOL) return logger.name
def new_f(*args, **kwargs): compressed = '' if len(args) > 0: compressed = '_' + '_'.join([str(arg)[:10] for arg in args]) if len(kwargs) > 0: compressed += '_' + '_'.join([(str(k)+str(v))[:10] for k,v in kwargs]) filename = '%s%s.pickle' % (f.__name__, compressed) if os.path.exists(filename): pickled = open(filename, 'rb') result = pickle.load(pickled) pickled.close() else: result = f(*args, **kwargs) pickled = open(filename, 'wb') pickle.dump(result, pickled) pickled.close() return result
def new_f(*args, **kwargs): compressed = '' if len(args) > 0: compressed = '_' + '_'.join([str(arg)[:10] for arg in args]) if len(kwargs) > 0: compressed += '_' + '_'.join([(str(k) + str(v))[:10] for k, v in kwargs]) logging.debug("Input File Name " +args[1]) filename = '%s.pickle' % (args[1]) logging.debug("Pickling file name %s "% filename) if os.path.exists(filename): pickled = open(filename, 'rb') result = pickle.load(pickled) pickled.close() else: result = f(*args, **kwargs) pickled = open(filename, 'wb') pickle.dump(result, pickled) pickled.close() return result
def save_iteration(self, X, Y, new_x): """ Saves an iteration. :param X: Data for the model (including the new observation) :param Y: Data for the model (including the new observation) :param new_x: the new observation """ max_iteration = self._get_last_iteration_number() iteration_folder = self.save_dir + "/%03d" % (max_iteration + 1, ) #pickle.dump(self, open(iteration_folder+"/bayesian_opt.pickle", "w")) os.makedirs(iteration_folder) if hasattr(self.acquisition_fkt, "_get_most_probable_minimum") and not self.model_untrained: pickle.dump([ new_x, X, Y, self.acquisition_fkt._get_most_probable_minimum()[0] ], open(iteration_folder + "/observations.pickle", "w")) else: pickle.dump( [new_x, X, Y, self.model.getCurrentBestX()], open(iteration_folder + "/observations.pickle", "w"))
def save(self, filename): """ Save to a zipped pickle file. The resulting file can be used to initialize Engine object using `Engine.load(filename)`. Parameters ---------- filename : str """ dat = { 'init_args': self._init_args, 'rng_state': { 'np': np.random.get_state(), 'py': random.getstate()}, 'cls_attrs': { 'models': self._models, 'n_models': self._n_models, 'diagnostic_tables': self._diagnostic_tables, 'converters': self._converters}} with open(filename, 'wb') as f: pkl.dump(dat, f)
print "train start!!" datas = [] for i in range(len(train_set[1])): temp_answer = [] answer = train_set[1][i] sentence = train_set[0][i] if answer == 0: temp_answer = [1, 0] else: temp_answer = [0, 1] datas.append((np.array(sentence, dtype='int32'), np.array(temp_answer, dtype=theano.config.floatX))) i = 0 while i < 20: start_time = time.time() accuracy, attentions = model.test(test_set[0], test_set[1]) print accuracy fattentions.write(attentions + '\n') print str(i + 1) + 'th epoch' model.train(datas=datas) end_time = time.time() print 'test time is ' + str(end_time - start_time) i += 1 accuray, attentions = model.test(test_set[0], test_set[1]) print accuracy fattentions.write(attentions + '\n') fattentions.close() pickle.dump(model, open('model/model_1.pkl'))
def save(self, fileobject=None): fileobject, close = file_open(fileobject or self._cachefile, 'wb') pickle.dump(self._symcache, fileobject) if close: fileobject.close()
dimensions = array.dimensions attributes = array.attributes if len(attributes) < 1: raise Exception("Inputs table must have at least one attribute.") if len(dimensions) != 1: raise Exception("Inputs table must have exactly one dimension.") timeseries_count = dimensions[0]["end"] - dimensions[0]["begin"] attributes_data = {} for attribute in range(len(attributes)): data = array.get_data(attribute)[...] attributes_data["%s" % attribute] = data arrayset_inputs = dict(aid="inputs", array=0, dimensions=dimensions, attributes=attributes) with open(os.path.join(dirname, "arrayset_inputs.pickle"), "wb") as arrayset_inputs_pickle: pickle.dump(arrayset_inputs, arrayset_inputs_pickle) #connection.put_model_arrayset(mid, "inputs") #connection.put_model_arrayset_array(mid, "inputs", 0, dimensions, attributes) for attribute in range(len(attributes)): print("Storing input table attribute %s", attribute) data = array.get_data(attribute)[...] with open(os.path.join(dirname, "inputs_attributes_data_%s.pickle" % attribute), "wb") as attributes_file: pickle.dump(data, attributes_file) #connection.put_model_arrayset_data(mid, "inputs", "0/%s/..." % attribute, [data]) # Create a mapping from unique cluster names to timeseries attributes. #connection.update_model(mid, state="running", started = datetime.datetime.utcnow().isoformat(), progress = 0.0, message="Mapping cluster names.") clusters = collections.defaultdict(list) timeseries_samples = numpy.zeros(shape=(timeseries_count))
if len(attributes) < 1: raise Exception("Inputs table must have at least one attribute.") if len(dimensions) != 1: raise Exception("Inputs table must have exactly one dimension.") timeseries_count = dimensions[0]["end"] - dimensions[0]["begin"] """ Save data to dictionary to be pickled. Slycat server will later un-pickle the file and use the data for the following commands: put_model_arrayset(mid, "inputs") put_model_arrayset_array(mid, "inputs", 0, dimensions, attributes) """ arrayset_inputs = dict(aid="inputs", array=0, dimensions=dimensions, attributes=attributes) with open(os.path.join(dirname, "arrayset_inputs.pickle"), "wb") as arrayset_inputs_pickle: pickle.dump(arrayset_inputs, arrayset_inputs_pickle) """ Fetch data for each of the attributes and pickle to disk. Slycat server will later un-pickle the files and use the data for the following command: put_model_arrayset_data(mid, "inputs", "0/%s/..." % attribute, [data]) """ attributes_array = numpy.empty(shape=(len(attributes),), dtype=object) for attribute in range(len(attributes)): print("Storing input table attribute %s", attribute) attributes_array[attribute] = array.get_data(attribute)[...] with open(os.path.join(dirname, "inputs_attributes_data.pickle"), "wb") as attributes_file: pickle.dump(attributes_array, attributes_file) # Create a mapping from unique cluster names to timeseries attributes.
def getRatios(): try: import cpickle as pickle except ImportError: import pickle as pickle import datetime pairsFile = "ratioHistory.d" try: pairsDict=pickle.load( open( pairsFile, "rb" ) ) except IOError: from sys import getsizeof from symbols import symbolList import numpy as np from processFuncs import writeLog from math import log #Then we need to create the file # Structure {Symbol:[12.12,13,13.2...],...] # In our program, we need to retrieve the history array of open,close values and then append the latest values for each # symbol # For testing, just retrieve the latest history from createHistory import getHistory # This is how we get the price ratio # This will get us the average price and then divide them # np.divide(np.average(histDict['AAPL']),np.average(histDict['AMZN'])) # This will create an array that shows the history of the price ratios # np.divide(histDict['AAPL'],histDict['AMZN']) # Find the standard deviation of the price ratios # np.std(np.divide(histDict['AAPL'],histDict['AMZN'])) pairsDict={} pairsHistory=[] symbol1Arry=[] symbol2Arry=[] savePair=0 dupeList=symbolList[:] stockHist = getHistory() #This will be the entire history in a dictionary with key=symbol the dates are based on order writeLog("Creating Pairs") for symbol in symbolList: # Don't need to do this since we already have a history function #symbolArry = stockHist[symbol]#map(lambda x: x['Close'],db.nasdaq.find({'Symbol':symbol},{'Close': 1, '_id':0 })) symbolArry = map(lambda x:(x[0],x[4]),stockHist[symbol]) for symbol2 in dupeList: symbol2Arry=[] if symbol2 == symbol: savePair=0 continue #symbol2Arry = stockHist[symbol2] #map(lambda x: x['Close'],db.nasdaq.find({'Symbol':symbol2},{'Close': 1, '_id':0 })) symbol2Arry=map(lambda x:(x[0],x[4]),stockHist[symbol2]) minLen=min(len(symbolArry),len(symbol2Arry)) if minLen>0: symbolArry=symbolArry[-minLen:] symbol2Arry=symbol2Arry[-minLen:] savePair=1 #if len(symbolArry)==len(symbol2Arry): # savePair=1 if savePair: # Need to create another dictionary that stores the average and standard deviation # Also, need to keep track of correlation # For each day we need to divide symbol2dict=dict(symbol2Arry) doPairAppend=pairsHistory.append for day in symbolArry: dayDate=day[0] dayClose=day[1] if dayDate not in symbol2dict: continue day2Close=symbol2dict[dayDate] doPairAppend((dayDate,log(round(np.divide(float(dayClose),float(day2Close)),5)))) #print pairsHistory #print symbol,symbol2 #raw_input() pairsDict[("%s %s")%(symbol,symbol2)]=pairsHistory if False: for day in symbolArry: dayDate=day[0] day2Close=day[1] #The date is stored in GMT time, so sometimes it has standard time and sometimes it's daylight #time. Have to remove the GMT portion. Not sure if in import or when creating history. if dayDate not in symbol2dict: continue dayClose=symbol2dict[dayDate] pairsHistory.append((dayDate,round(np.divide(day2Close,dayClose),5))) pairsDict[("%s %s")%(symbol2,symbol)]=pairsHistory #pairsHistory=np.divide(symbol2Arry,symbolArry,) #pairsDict[("%s %s")%(symbol2,symbol)]=pairsHistory pairsHistory=[] symbol1Arry=[] symbol2Arry=[] savePair=0 #if len(dupeList): # dupeList.pop(0) #else: # break pickle.dump( pairsDict, open(pairsFile, "wb" )) return pairsDict
def init_log(filename): f = open(filename, 'w') data = {} cpickle.dump(data, f) f.close()
def __del__(self): """docstring for __del__ """ with open(self.base, 'wb') as basefile: pickle.dump([self.poolsize, self.pool, self.ttl, self.tick], basefile)
def flush(self): """ Save store to file. """ with open(self.filename, 'wb') as fp: pickle.dump(self.store, fp, -1)
train = pd.read_csv('../data/train.csv') x_train = train.values[:, 0:-1] y_train = np.array([[y] for y in train.values[:, -1]]) _, in_size = x_train.shape _, out_size = y_train.shape ds = SupervisedDataSet(in_size, out_size) ds.setField('input', x_train) ds.setField('target', y_train) net = buildNetwork(in_size, h_size, out_size, bias=True) trainer = BP(net, ds) print("start training ...") #mse = trainer.train() #trainer.trainUntilConvergence(verbose=True, maxEpochs=4) for n in xrange(epo): mse = trainer.train() rmse = sqrt(mse) print("RMSE = %8.3f epoch = %d" % (rmse, n)) print 'saving to pickle ...' pickle.dump(net, open('net.pk', 'wb')) print "done"