Example #1
0
def createDataDump(filename):
    '''
        From 2 vectors (one raw and one normalized) create several cPickle dumps
    '''
    # Save raw output
    with open(dump_path + 'rawOutput' + filename + '.txt', 'wb') as f:
        try:
            cPickle.dump(all_desired_data, f)
        except:
            print "Could not dump " + 'rawOutput' + filename + '.txt'

    with open(dump_path + 'rawOutputExtract' + filename + '.txt', 'wb') as f:
        try:
            extract = []
            for i in range(100):
                extract.append(all_desired_data[i])
            cPickle.dump(extract, f)
        except:
            print "Could not dump " + 'rawOutputExtract' + filename + '.txt'

    with open(dump_path + 'preprocessedOutput' + filename + '.txt', 'wb') as f:
        try:
            cPickle(preprocessing.scale(all_desired_data), f)
        except:
            print "Could not dump " + 'preprocessedOutput' + filename + '.txt'

    with open(dump_path + 'normOutput' + filename + '.txt', 'wb') as f:
        try:
            cPickle.dump(all_desired_data_normalized, f)
        except:
            print "Could not dump " + 'normOutput' + filename + '.txt'

    with open(dump_path + 'normOutputExtract' + filename + '.txt', 'wb') as f:
        try:
            extract = []
            for i in range(100):
                extract.append(all_desired_data_normalized[i])
            cPickle.dump(extract, f)
        except:
            print "Could not dump " + 'normOutputExtract' + filename + '.txt'

    with open(dump_path + 'normOutputClean' + filename + '.txt', 'wb') as f:
        extract = []
        for i in range(len(all_desired_data_normalized)):
            dataOk = True
            for j in all_desired_data_normalized[i][1]:
                # We assume that 0, '' and so on mean not analyzed so we do not keep it
                if j == 0 or j == '' or j == '0':
                    dataOk = False
                    break
                if np.isnan(j):
                    dataOk = False
                    break
            if dataOk:
                extract.append(all_desired_data_normalized[i])
        cPickle.dump(extract, f)
Example #2
0
    def get_model(self, fname, part, n=0):
        import re

        """	NOTE: to prevent misidentification of pickled components, the n-gram model and n-gram frequencies
			should be pickled and retrieved separately.
			:param fname: name of the file containing the pickled item to be retrieved
			:type fname: str
			:param part: what part of the model is being retrieved:
				grams:	self.model[n] (will raise a ValueError if there's no corresponding self.model[n])
				model:	self.model
				freq:	self.ngramsFreqDict[n] (will raise a ValueError if there's no corresponding self.ngramsFreqDict[n])
				freqs:	self.ngramsFreqDict
			:type part:	str
			:param n:	if part == grams or freq, picks out which slice is being unpickled
			:type n:	int"""
        if re.match(fname, ".pickle") == False:
            fname += ".pickle"
        if os.path.isfile(fname) == False:
            raise IOError("no such file")
        else:
            f_in = open(fname, "rb")
            brine = cPickle()
            if part == "model":
                self.model = brine.load(f_in)
            elif part == "freqs":
                self.ngramsFreqDict = brine.load(f_in)
            elif part == "grams":
                if n == 0:
                    raise ValueError("n must be greater than 0")
                else:
                    if self.model:
                        if n in self.model.keys():
                            if raw_input("{0}-gram model found. Overwrite? (y/n)".format(n)) == "n":
                                self.model[n] = brine.load(f_in)
                            elif raw_input("{0}-gram model found. Overwrite? (y/n)".format(n)) == "y":
                                print "not overwriting {0}-gram model".format(n)
                                return None
                    else:
                        self.model = dict()
                        self.model[n] = brine.load(f_in)
            elif part == "freq":
                if n == 0:
                    raise ValueError("n must be greater than 0")
                else:
                    if self.ngramsFreqDict:
                        if n in self.ngramsFreqDict.keys():
                            if raw_input("{0}-gram frequencies found. Overwrite? (y/n)".format(n)) == "n":
                                self.model[n] = brine.load(f_in)
                            elif raw_input("{0}-gram frequencies found. Overwrite? (y/n)".format(n)) == "y":
                                print "not overwriting {0}-gram frequencies".format(n)
                                return None
                    else:
                        self.ngramsFreqDict = dict()
                        self.ngramsFreqDict[n] = brine.load(f_in)
            else:
                raise ValueError("invalid choice. part must be grams, model, freq, or freqs")
Example #3
0
	def get_model(self,fname):
		import cPickle
		import re
		if re.match(fname,".pickle") == False:
			fname += ".pickle"
		if os.path.isfile(fname) == False:
			raise IOError("no such file")
		else:
			f_in = open(fname,"rb")
			brine = cPickle()
			self.sorted_d = sorted(brine.load(f_in),key=lambda x: x[1],reverse=True)
			f_in.close()
Example #4
0
    def pickle_model(self, fname, cuke, n=0):
        import re

        """	NOTE: to prevent misidentification of pickled components, the n-gram model and n-gram frequencies
			should be pickled and retrieved separately.
			:param fname: name of the file to be output to
			:param fname: str
			:param cuke: what you want to pickle
			:type cuke:	grams: self.model[n] (will raise a ValueError if there's no corresponding self.model[n])
						model:	self.model
						freq:	self.ngramsFreqDict[n] (will raise a ValueError if there's no corresponding self.ngramsFreqDict[n])
						freqs:	self.ngramsFreqDict
			:param n: if cuke == grams or freq, picks out which slice to pickle
			:type n: int"""
        if re.match(fname, ".pickle") == False:
            fname += ".pickle"
        if os.path.isfile(fname):
            if raw_input("File exists! Overwrite? (y/n)") == "n":
                print "not overwriting file %s" % (str(fname))
                return None
            else:
                pass
        choice = cuke
        brine = cPickle()
        f_out = open(str(fname), "wb")
        if choice == "model":
            brine.dump(self.model, f_out, protocol=pickle.HIGHEST_PROTOCOL)
            print "saved ngram model as {0}".format(fname)
        elif choice == "freqs":
            brine.dump(self.ngramsFreqDict, f_out, protocol=pickle.HIGHEST_PROTOCOL)
            print "saved ngram frequencies as {0}".format(fname)
        elif choice == "grams":
            if n == 0:
                raise ValueError("n must be greater than 0")
            elif n in self.model.keys():
                brine.dump(self.model[n], f_out, protocol=pickle.HIGHEST_PROTOCOL)
                print "saved {0}-grams as {1}".format(n, fname)
            else:
                raise IndexError("n has to be equal to a set of trained ngrams (1-{0})".format(len(self.model.keys())))
        elif choice == "freq":
            if n == 0:
                raise ValueError("n must be greater than 0")
            elif n in self.model.keys():
                brine.dump(self.ngramFreqDict[n], f_out, protocol=pickle.HIGHEST_PROTOCOL)
                print "saved {0}-gram frequencies as {1}".format(n, fname)
            else:
                raise IndexError("n has to be equal to a set of trained ngrams (1-{0})".format(len(self.model.keys())))
        else:
            raise ValueError("invalid choice. options are grams, model, freq, or freqs.")
        f_out.close()
Example #5
0
	def pickle_model(self,fname):
		import cPickle
		import re
		if re.match(fname,".pickle") == False:
			fname += ".pickle"
		if os.path.isfile(fname):
			if raw_input("File exists! Overwrite? (y/n)") == "n":
				print "not overwriting file %s" %(str(fname))
				return None
			else:
				pass
		brine = cPickle()
		f_out = open(str(fname),"wb")
		brine.dump(self.model,f_out,protocol=pickle.HIGHEST_PROTOCOL)
		print "saved ngram model as {0}".format(fname)
		f_out.close()
Example #6
0
import cPickle
import numpy as np
import cv2
from sklearn.svm import LinearSVC


f=open('/home/divay/Documents/cs321n/cifar-10-batches-py/data_batch_1')
pkl=cPickle(f)
dict1=pkl.load()
x=dict1['data']
y=dict1['label']


clf=LinearSVC()
clf.fit(x,y)
w=clf.coeff_

w=w*1000000*255/172
cat=w[0]
rs_w=np.reshape(cat,(32,32,3)
rs_w=cv2.resize(rs_w,(320,320))
cv2.imshow('',rs_w)
Example #7
0
 def load(self,filename):
   # straightforward (but somehow slow) (de)serialisation using cPickle
   self = cPickle(open(filename,'rb'))