def createDataDump(filename): ''' From 2 vectors (one raw and one normalized) create several cPickle dumps ''' # Save raw output with open(dump_path + 'rawOutput' + filename + '.txt', 'wb') as f: try: cPickle.dump(all_desired_data, f) except: print "Could not dump " + 'rawOutput' + filename + '.txt' with open(dump_path + 'rawOutputExtract' + filename + '.txt', 'wb') as f: try: extract = [] for i in range(100): extract.append(all_desired_data[i]) cPickle.dump(extract, f) except: print "Could not dump " + 'rawOutputExtract' + filename + '.txt' with open(dump_path + 'preprocessedOutput' + filename + '.txt', 'wb') as f: try: cPickle(preprocessing.scale(all_desired_data), f) except: print "Could not dump " + 'preprocessedOutput' + filename + '.txt' with open(dump_path + 'normOutput' + filename + '.txt', 'wb') as f: try: cPickle.dump(all_desired_data_normalized, f) except: print "Could not dump " + 'normOutput' + filename + '.txt' with open(dump_path + 'normOutputExtract' + filename + '.txt', 'wb') as f: try: extract = [] for i in range(100): extract.append(all_desired_data_normalized[i]) cPickle.dump(extract, f) except: print "Could not dump " + 'normOutputExtract' + filename + '.txt' with open(dump_path + 'normOutputClean' + filename + '.txt', 'wb') as f: extract = [] for i in range(len(all_desired_data_normalized)): dataOk = True for j in all_desired_data_normalized[i][1]: # We assume that 0, '' and so on mean not analyzed so we do not keep it if j == 0 or j == '' or j == '0': dataOk = False break if np.isnan(j): dataOk = False break if dataOk: extract.append(all_desired_data_normalized[i]) cPickle.dump(extract, f)
def get_model(self, fname, part, n=0): import re """ NOTE: to prevent misidentification of pickled components, the n-gram model and n-gram frequencies should be pickled and retrieved separately. :param fname: name of the file containing the pickled item to be retrieved :type fname: str :param part: what part of the model is being retrieved: grams: self.model[n] (will raise a ValueError if there's no corresponding self.model[n]) model: self.model freq: self.ngramsFreqDict[n] (will raise a ValueError if there's no corresponding self.ngramsFreqDict[n]) freqs: self.ngramsFreqDict :type part: str :param n: if part == grams or freq, picks out which slice is being unpickled :type n: int""" if re.match(fname, ".pickle") == False: fname += ".pickle" if os.path.isfile(fname) == False: raise IOError("no such file") else: f_in = open(fname, "rb") brine = cPickle() if part == "model": self.model = brine.load(f_in) elif part == "freqs": self.ngramsFreqDict = brine.load(f_in) elif part == "grams": if n == 0: raise ValueError("n must be greater than 0") else: if self.model: if n in self.model.keys(): if raw_input("{0}-gram model found. Overwrite? (y/n)".format(n)) == "n": self.model[n] = brine.load(f_in) elif raw_input("{0}-gram model found. Overwrite? (y/n)".format(n)) == "y": print "not overwriting {0}-gram model".format(n) return None else: self.model = dict() self.model[n] = brine.load(f_in) elif part == "freq": if n == 0: raise ValueError("n must be greater than 0") else: if self.ngramsFreqDict: if n in self.ngramsFreqDict.keys(): if raw_input("{0}-gram frequencies found. Overwrite? (y/n)".format(n)) == "n": self.model[n] = brine.load(f_in) elif raw_input("{0}-gram frequencies found. Overwrite? (y/n)".format(n)) == "y": print "not overwriting {0}-gram frequencies".format(n) return None else: self.ngramsFreqDict = dict() self.ngramsFreqDict[n] = brine.load(f_in) else: raise ValueError("invalid choice. part must be grams, model, freq, or freqs")
def get_model(self,fname): import cPickle import re if re.match(fname,".pickle") == False: fname += ".pickle" if os.path.isfile(fname) == False: raise IOError("no such file") else: f_in = open(fname,"rb") brine = cPickle() self.sorted_d = sorted(brine.load(f_in),key=lambda x: x[1],reverse=True) f_in.close()
def pickle_model(self, fname, cuke, n=0): import re """ NOTE: to prevent misidentification of pickled components, the n-gram model and n-gram frequencies should be pickled and retrieved separately. :param fname: name of the file to be output to :param fname: str :param cuke: what you want to pickle :type cuke: grams: self.model[n] (will raise a ValueError if there's no corresponding self.model[n]) model: self.model freq: self.ngramsFreqDict[n] (will raise a ValueError if there's no corresponding self.ngramsFreqDict[n]) freqs: self.ngramsFreqDict :param n: if cuke == grams or freq, picks out which slice to pickle :type n: int""" if re.match(fname, ".pickle") == False: fname += ".pickle" if os.path.isfile(fname): if raw_input("File exists! Overwrite? (y/n)") == "n": print "not overwriting file %s" % (str(fname)) return None else: pass choice = cuke brine = cPickle() f_out = open(str(fname), "wb") if choice == "model": brine.dump(self.model, f_out, protocol=pickle.HIGHEST_PROTOCOL) print "saved ngram model as {0}".format(fname) elif choice == "freqs": brine.dump(self.ngramsFreqDict, f_out, protocol=pickle.HIGHEST_PROTOCOL) print "saved ngram frequencies as {0}".format(fname) elif choice == "grams": if n == 0: raise ValueError("n must be greater than 0") elif n in self.model.keys(): brine.dump(self.model[n], f_out, protocol=pickle.HIGHEST_PROTOCOL) print "saved {0}-grams as {1}".format(n, fname) else: raise IndexError("n has to be equal to a set of trained ngrams (1-{0})".format(len(self.model.keys()))) elif choice == "freq": if n == 0: raise ValueError("n must be greater than 0") elif n in self.model.keys(): brine.dump(self.ngramFreqDict[n], f_out, protocol=pickle.HIGHEST_PROTOCOL) print "saved {0}-gram frequencies as {1}".format(n, fname) else: raise IndexError("n has to be equal to a set of trained ngrams (1-{0})".format(len(self.model.keys()))) else: raise ValueError("invalid choice. options are grams, model, freq, or freqs.") f_out.close()
def pickle_model(self,fname): import cPickle import re if re.match(fname,".pickle") == False: fname += ".pickle" if os.path.isfile(fname): if raw_input("File exists! Overwrite? (y/n)") == "n": print "not overwriting file %s" %(str(fname)) return None else: pass brine = cPickle() f_out = open(str(fname),"wb") brine.dump(self.model,f_out,protocol=pickle.HIGHEST_PROTOCOL) print "saved ngram model as {0}".format(fname) f_out.close()
import cPickle import numpy as np import cv2 from sklearn.svm import LinearSVC f=open('/home/divay/Documents/cs321n/cifar-10-batches-py/data_batch_1') pkl=cPickle(f) dict1=pkl.load() x=dict1['data'] y=dict1['label'] clf=LinearSVC() clf.fit(x,y) w=clf.coeff_ w=w*1000000*255/172 cat=w[0] rs_w=np.reshape(cat,(32,32,3) rs_w=cv2.resize(rs_w,(320,320)) cv2.imshow('',rs_w)
def load(self,filename): # straightforward (but somehow slow) (de)serialisation using cPickle self = cPickle(open(filename,'rb'))