Python Normalization Exemples, Normalization Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : Policy.py Projet : gitter-badger/ML_workshop

    def suggest_moves(self, board):
        board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(
            board, board.color_to_play).astype(np.float32)
        Normalization.apply_featurewise_normalization_C(board_feature_planes)
        feed_dict = {
            self.feature_planes:
            board_feature_planes.reshape(1, self.model.N, self.model.N,
                                         self.model.Nfeat)
        }
        move_logits = self.sess.run(self.logits,
                                    feed_dict).ravel()  # ravel flattens to 1D
        # zero out illegal moves
        for x in xrange(self.model.N):
            for y in xrange(self.model.N):
                ind = self.model.N * x + y
                if not board.play_is_legal(x, y, board.color_to_play):
                    move_logits[ind] = -1e99
        move_probs = softmax(move_logits, self.softmax_temp)
        sum_probs = np.sum(move_probs)
        if sum_probs == 0: return []  # no legal moves
        move_probs /= sum_probs  # re-normalize probabilities

        good_moves = []
        cum_prob = 0.0
        while cum_prob < self.threshold_prob:
            ind = np.argmax(move_probs)
            x, y = ind / self.model.N, ind % self.model.N
            good_moves.append((x, y))
            prob = move_probs[ind]
            cum_prob += prob
            move_probs[ind] = 0

        return good_moves

Exemple #2

0

Afficher le fichier

Fichier : invertedIndex.py Projet : chengchengstudy/ReutersSPIMI

def getInvertedIndexTokens(fileNum):
    invertedIndex={}
    #loading the orginal documents for tokenization and normalization later
    fileNum= '%0*d' % (3, fileNum)
    resourcepath = 'reuters/reut2-' + fileNum + '.sgm'
    file = open(resourcepath)
    soup = BeautifulSoup(file, 'html.parser')
    allReuters=soup.find_all('reuters')
    for reuters in allReuters:
        if reuters.body is not None:
            #Tokenize the text inside the [body]tag in the files 
            tokenslist=reuters.body.text.rsplit('reuters',1)
            for e in tokenslist:
                tokens=''.join(e).encode('utf8')
                tokens=Tokenization.tokenization(tokens)
                for token in tokens:
                    #Normalization after get the tokens 
                    token = Normalization.cleanedTokens(token)
                    token = Normalization.caseFoldedTokens(token)
                    token = Normalization.cleanStopWords150(token)
                    token = Normalization.stemmedTokens(token)
                    #Construct the inverted index for tokens
                    if token != '':
                        if invertedIndex.has_key(token):
                            if reuters['newid'] not in invertedIndex[token]:
                                invertedIndex[token].append(reuters['newid'])
                                
                        else:
                            invertedIndex[token] = [reuters['newid']]
                            
    return invertedIndex

Exemple #3

0

Afficher le fichier

Fichier : search.py Projet : chengchengstudy/ReutersSPIMI

def searchQueryUnion(query,dictionary):
    result=[]
    IDresult=[]
    postings=[]
    DocID=[]
    #Tokenise the query for process later
    tokens=Tokenization.tokenization(query)
    #Normalize the tokens for process later
    for index in range(len(tokens)):
        tokens[index] = Normalization.cleanStopWords150(tokens[index])
        tokens[index] = Normalization.cleanedTokens(tokens[index])
        tokens[index] = Normalization.caseFoldedTokens(tokens[index])
        tokens[index] = Normalization.stemmedTokens(tokens[index])
        if tokens[index] in dictionary:
            DocID=DocID+dictionary.get(tokens[index])
    if DocID!=[]:
        #change docID from string to int
        for index in range(len(DocID)):
            DocID[index]=DocID[index].split(',')
            DocID[index]=DocID[index][0].split()
            DocID[index]=map(int, DocID[index])
        #Getting the intersection of the DocID for different tokens in query
        DocID=[set(id) for id in DocID]
        #print DocID
        finalID=sorted(set.union(*DocID))
        return finalID
    else:
        finalID=DocID
        return finalID

Exemple #4

0

Afficher le fichier

Fichier : Eval.py Projet : gitter-badger/ML_workshop

 def evaluate(self, board):
     board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(
         board, board.color_to_play).astype(np.float32)
     Normalization.apply_featurewise_normalization_C(board_feature_planes)
     feed_dict = {
         self.feature_planes:
         board_feature_planes.reshape(1, self.model.N, self.model.N,
                                      self.model.Nfeat)
     }
     score = np.asscalar(self.sess.run(self.score_op, feed_dict))
     return score

Exemple #5

0

Afficher le fichier

Fichier : BM25Scoring.py Projet : chengchengstudy/ReutersSPIMI

def purrify(query):
    # purrify the query
    normalizedTokens = []
    tokens = Tokenization.tokenization(query)
    for token in tokens:
        token = Normalization.cleanedTokens(token)
        token = Normalization.caseFoldedTokens(token)
        token = Normalization.cleanStopWords150(token)
        token = Normalization.stemmedTokens(token)
        if token != "":
            normalizedTokens.append(token)
    #print normalizedTokens
    return normalizedTokens

Exemple #6

0

Afficher le fichier

Fichier : Generation.py Projet : Oredigger/SeizureDetection

def normFeature(feat_dict, normalize, mean = None, std = None):
	# Do the feature normalization here
	if normalize == "MinMax":
		print("Using MinMax")
		for i in feat_key:
			feat_dict[i] = norm.normMinMax(np.asarray(feat_dict[i]))
	elif normalize == "MeanStd":
		print("Using MeanStd")
		for i in feat_key:
			feat_dict[i] = norm.normMeanStd(np.asarray(feat_dict[i]), mean[i], std[i])
	else:
		print("No proper normalization tool was selected")
		assert(False)

Exemple #7

0

Afficher le fichier

Fichier : EvalEngine.py Projet : TheDuck314/go-NN

 def get_position_eval(self):
     #assert self.model.Nfeat == 21
     #board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(self.board, self.board.color_to_play).astype(np.float32)
     #Normalization.apply_featurewise_normalization_C(board_feature_planes)
     assert self.model.Nfeat == 22
     board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures_komi(self.board, self.board.color_to_play, self.komi).astype(np.float32)
     Normalization.apply_featurewise_normalization_D(board_feature_planes)
     feature_batch = Symmetry.make_symmetry_batch(board_feature_planes)
     feed_dict = {self.feature_planes: feature_batch}
     probs_batch = self.sess.run(self.probs_op, feed_dict)
     prob = average_probs_over_symmetries(probs_batch)
     if self.board.color_to_play == Color.White:
         prob *= -1
     return prob

Exemple #8

0

Afficher le fichier

Fichier : Main_Preprocessing_ver1.py Projet : sanjeeviisl/biocaffe

 def dataset_Leaf(self):
     """
     Create the data for the Foliage Dataset
     :return:
     """
     self.create_data_base()
     self.make_directories("other")
     print("Create id Text File ...")
     id_d = Make_Id_Species.Set_Id_Species(self.p + "train/",
                                           self.path_database["data"])
     id_d.set_list_id()
     id_d.set_id_dic()
     print("Create File ...")
     for directories_data in ["normalization", "augmentation"]:
         self.m_dir.make_directories(id_d.path_dict,
                                     self.path_database[directories_data])
     print("Normalize ...")
     norm_train = Normalization.Normalize(
         self.p + "train/", self.path_database["normalization"])
     norm_train.normalize_dimension_image()
     print("Augmentation ...")
     d_augm = Data_Augmentation.Data_Augmentation(
         self.path_database["normalization"],
         self.path_database["augmentation"])
     d_augm.create_augmentation(False)
     print("Normalize Validation...")
     shutil.rmtree(self.path_database["normalization"])
     os.makedirs(self.a + "normalization/")
     self.m_dir.make_directories(id_d.path_dict,
                                 self.path_database["normalization"])
     norm_test = Normalization.Normalize(
         self.p + "validation/", self.path_database["normalization"])
     norm_test.normalize_dimension_image()
     norm_test.reduce()
     m_file = Make_train_val_file()
     print("Create Text File ...")
     path_for_val_train = {
         "train": "augmentation",
         "validation": "normalization"
     }
     for i in path_for_val_train:
         m_file.make_file(self.path_database[path_for_val_train[i]],
                          self.path_database["data"] + i + ".txt",
                          id_d.path_dict)
     self.lmdb_build.set_lmdb(self.path_database, self.a,
                              self.caffe_path + "build/tools")
     mean = Create_Mean()
     mean.make_mean(self.a, self.a + "lmdb/",
                    self.caffe_path + "build/tools")

Exemple #9

0

Afficher le fichier

Fichier : EvalEngine.py Projet : gitter-badger/ML_workshop

 def get_position_eval(self):
     #assert self.model.Nfeat == 21
     #board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(self.board, self.board.color_to_play).astype(np.float32)
     #Normalization.apply_featurewise_normalization_C(board_feature_planes)
     assert self.model.Nfeat == 22
     board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures_komi(
         self.board, self.board.color_to_play, self.komi).astype(np.float32)
     Normalization.apply_featurewise_normalization_D(board_feature_planes)
     feature_batch = Symmetry.make_symmetry_batch(board_feature_planes)
     feed_dict = {self.feature_planes: feature_batch}
     probs_batch = self.sess.run(self.probs_op, feed_dict)
     prob = average_probs_over_symmetries(probs_batch)
     if self.board.color_to_play == Color.White:
         prob *= -1
     return prob

Exemple #10

0

Afficher le fichier

def chooseZscoreSchemeFromFiles(folder, beta_contents, covariance_entries,
                                weight_db_logic):
    if len(beta_contents) == 0:
        raise Exceptions.ReportableException(
            "No snp's beta data found. Please check your beta files and/or command line arguments."
        )

    beta_content = beta_contents[0]
    beta_path = os.path.join(folder, beta_content)
    zscore_scheme = None
    normalization_scheme = None
    with gzip.open(beta_path) as content:
        header = content.readline().strip()
        # So. If beta_z is present, just go for "modified formula" with reference variance.
        # Any other option has to be specifically chosen.
        if "beta_z" in header:
            zscore_scheme = ZScoreCalculation.BETA_Z_SIGMA_REF
            normalization_scheme = Normalization.NONE
        elif "beta" in header and "sigma_l" in header:
            zscore_scheme = ZScoreCalculation.METAXCAN
            normalization_scheme = _chooseNormalization(header)
        elif "beta" in header and not "sigma_l" in header:
            zscore_scheme = ZScoreCalculation.METAXCAN_FROM_REFERENCE
            normalization_scheme = _chooseNormalization(header)
        else:
            raise Exception("Couldn't infer data from beta file header")
    logging.info("Chose zscore scheme '%s' and normalization '%s'",
                 zscore_scheme, normalization_scheme)
    zscore_calculation = ZScoreCalculation.ZScoreScheme(zscore_scheme)
    normalization = Normalization.normalizationScheme(normalization_scheme,
                                                      covariance_entries,
                                                      weight_db_logic)
    return zscore_calculation, normalization

Exemple #11

0

Afficher le fichier

Fichier : TFEngine.py Projet : gitter-badger/ML_workshop

    def pick_model_move(self, color):
        if self.model.Nfeat == 15:
            board_feature_planes = Features.make_feature_planes_stones_3liberties_4history_ko(
                self.board, color)
            Normalization.apply_featurewise_normalization_B(
                board_feature_planes)
        elif self.model.Nfeat == 21:
            board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(
                self.board, color).astype(np.float32)
            Normalization.apply_featurewise_normalization_C(
                board_feature_planes)
        else:
            assert False
        feature_batch = Symmetry.make_symmetry_batch(board_feature_planes)

        feed_dict = {self.feature_planes: feature_batch}

        logit_batch = self.sess.run(self.logits, feed_dict)
        move_logits = Symmetry.average_plane_over_symmetries(
            logit_batch, self.model.N)
        softmax_temp = 1.0
        move_probs = softmax(move_logits, softmax_temp)

        # zero out illegal moves
        for x in xrange(self.model.N):
            for y in xrange(self.model.N):
                ind = self.model.N * x + y
                if not self.board.play_is_legal(x, y, color):
                    move_probs[ind] = 0
        sum_probs = np.sum(move_probs)
        if sum_probs == 0: return Move.Pass()  # no legal moves, pass
        move_probs /= sum_probs  # re-normalize probabilities

        pick_best = True
        if pick_best:
            move_ind = np.argmax(move_probs)
        else:
            move_ind = sample_from(move_probs)
        move_x = move_ind / self.model.N
        move_y = move_ind % self.model.N

        self.last_move_probs = move_probs.reshape((self.board.N, self.board.N))

        return Move(move_x, move_y)

Exemple #12

0

Afficher le fichier

Fichier : test_analysis_unittest.py Projet : SalvatoreTosti/CoralImageAnalysis

 def test_Features_detect_kp_ORB_vs_clean_class(self):
     img = self.image
     gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
     ngray = Normalization.equalizeHistograms(gray)
     
     kp_ORB = self.feats.detect_kp_ORB(ngray)
     
     tmpfeats = Features()
     tmp_kp_ORB = tmpfeats.detect_kp_ORB(ngray)
     self.assertEqual(kp_ORB,tmp_kp_ORB)

Exemple #13

0

Afficher le fichier

Fichier : test_normalization_unittest.py Projet : SalvatoreTosti/CoralImageAnalysis

 def test_equalizeHistograms(self):
     equalHist = Normalization.equalizeHistograms(self.img)
     
     img = self.img
     gr = img
     if len(img.shape)==3:
         gr = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
     tmpEqualHist = cv2.equalizeHist(gr)
     equalityBool = (equalHist == tmpEqualHist).all()
     self.assertTrue(equalityBool)

Exemple #14

0

Afficher le fichier

Fichier : test_normalization_unittest.py Projet : SalvatoreTosti/CoralImageAnalysis

 def test_simpleNorm(self):
     simpleNorm = Normalization.simpleNorm(self.img)
     
     img = self.img
     imgc = numpy.zeros_like(img)
     for i in range(img.shape[2]):
         imgc[:,:,i] = img[:,:,i] * 255.0/img[:,:,i].max()
     
     equalityBool = (simpleNorm == imgc).all()
     self.assertTrue(equalityBool)

Exemple #15

0

Afficher le fichier

Fichier : test_analysis_unittest.py Projet : SalvatoreTosti/CoralImageAnalysis

 def test_Edges_sumCanny_vs_raw_calculation(self):
     img = self.image
     gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
     ngray = Normalization.equalizeHistograms(gray)
     sumCann = self.edges.sumCanny(ngray,1,255)
     
     tmpCann = cv2.Canny(ngray,1,255)
     tmpSumCann = numpy.sum(tmpCann)
     
     self.assertEqual(tmpSumCann,sumCann)

Exemple #16

0

Afficher le fichier

Fichier : test_analysis_unittest.py Projet : SalvatoreTosti/CoralImageAnalysis

 def test_FFT_fft_vs_raw_calculation(self):
     img = self.image
     gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
     ngray = Normalization.equalizeHistograms(gray)
     
     fftImage = self.fft.fft(ngray)
     
     tmpFftImage = numpy.fft.fft2(ngray)
     tmpBool = (fftImage==tmpFftImage).all()
     self.assertTrue(tmpBool)

Exemple #17

0

Afficher le fichier

def readData(filename,model):
	if(model == 2):
		print("model {}".format(model))
		data = norm.readData2(filename)
		data,mean,std = norm.NormalizetheData2(data)

	else :
		print("model = 1")
		data = norm.readData(filename)
		data,mean,std = norm.NormalizetheData(data)
	print(filename)
	print("-----")
	# print("columns top be normalized : {}".format(len(data[0][0])))
	slash = filename.rfind('/')
	ext = filename.rfind('.')
	normfile = filename[slash+1:ext] + "_norm.csv"
	# print("Normalized File : {}".format(normfile))
	with open("../Data/{}".format(normfile), "w") as f:
		    writer = csv.writer(f)
		    writer.writerows(data)
	return data,mean,std

Exemple #18

0

Afficher le fichier

Fichier : MLPM.py Projet : LMicol/Categorizador-Objetivo-ONU

def main(argv):
    classifiers = ['RF', 'KNN', 'DT', 'SVM', 'RNA']  #,'nn','sgd']
    #classifiers=['RF']#,'KNN','DT','SVM','RNA'] #,'nn','sgd']
    ds = pd.read_excel('../MinerText/tabelas/data_new.xlsx').dropna()

    X, y = FL.Selection(
        ds, ['OBJETIVO_ONU'
             ])  #all_data = class_Rob / |||  output = IC_Rob / IC_Sch
    '''
    vet = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
    for i in range(2267):
        vet[y.iloc[i][0]] += 1
    '''

    #todo pegar a quantidade de classes na saida
    y_size = 9  #no dataset 'output' são 6 classes
    #X = X[['ALUNOS_ENVOLVIDOS', 'CLASSIFICACAO', 'NUM_PUB_ALVO','SUBUNIDADE_ENSINO']]
    X_norm = Normalization.normalize(X)

    ### FEATURE SELECTION
    # two distinct ways to select features: kbest or fittoclassifier, which is better?
    #columns = FL.ExtraTree(X,y[y.columns[0]], number=5)
    #columns = FL.kBest(X_norm, y[y.columns[0]], k=5)
    #FL.corr_HeatMap(ds)

    #columns = ['Q','Qt','s1v0','fs','qc']#['Qt','fs','qc','qt','u2']#['qc', 'fs', 'u2']# ['Q','Qt','s1v0','fs','qc']#  'qc', 'fs', 'u2', 'sv0'#  'Qt','fs','qc','qt','u2'#

    #X = X[columns]
    y = y[y.columns[0]].values  # y[y.columns[0]].values

    #X_norm = Normalization.normalize(X)
    scaler = StandardScaler()
    scaler.fit(X)
    X_norm = scaler.transform(X)

    X_train, X_test, y_train, y_test = train_test_split(X_norm,
                                                        y,
                                                        test_size=0.3)
    #clf = ''
    #clf = Tunning.DT(X_train, y_train)

    for cla in classifiers:
        print('>> GRIDSEARCH RUNNING on: ', cla)
        print()
        ###exec("clf = Tunning."+cla+"(X_train, y_train)")
        clf = Tunning.models(cla, X_train, y_train, y_size)
        ### PREDICTING
        y_pred = clf.predict(X_test)

        print("Prediction score [", cla, ']:')
        print(clf.score(X_test, y_test))
        print(classification_report(y_test, y_pred))
        print('-------------------------------------------')

Exemple #19

0

Afficher le fichier

Fichier : test_analysis_unittest.py Projet : SalvatoreTosti/CoralImageAnalysis

 def test_Features_numberKeyPoints_vs_clean_class(self):
     img = self.image
     gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
     ngray = Normalization.equalizeHistograms(gray)
     
     kp_ORB = self.feats.detect_kp_ORB(ngray)
     tmpfeats = Features()
     tmpfeats.detect_kp_ORB(ngray)
     
     pts = self.feats.numberKeyPoints()
     tmpPts = tmpfeats.numberKeyPoints()
     self.assertEqual(pts,tmpPts)

Exemple #20

0

Afficher le fichier

Fichier : TFdict.py Projet : chengchengstudy/ReutersSPIMI

def getAllTokens(fileNum): 
    # Create inverted index, loop through all articles in one file   
    invertedIndex = {}
    tokensLength= open('invertedIndex/tokensLength', 'a')
    Content = open('invertedIndex/Content', 'a')
    #loading the orginal documents for tokenization and normalization later
    fileNum= '%0*d' % (3, fileNum)
    resourcepath = 'reuters/reut2-' + fileNum + '.sgm'
    file = open(resourcepath)
    soup = BeautifulSoup(file, 'html.parser')
    for doc in soup.find_all('reuters'):
        docId = int(doc['newid'].encode('utf8'))
        tokenCounter = 0
        if doc.body is not None:
            content = doc.body.text
            length=len(content)
            Content.write (str(docId) + ' Start ' + content.encode('utf8') + ' End ')
            tokens = Tokenization.tokenization(content)
            for token in tokens:
                # Normalization
                token = Normalization.cleanedTokens(token)
                token = Normalization.caseFoldedTokens(token)
                token = Normalization.cleanStopWords150(token)
                token = Normalization.stemmedTokens(token)
                if token != '':
                    tokenCounter += 1
                    # Add to the postings list if the word exists
                    if invertedIndex.has_key(token):
                        if invertedIndex[token].has_key(docId):
                            tf = invertedIndex[token][docId]
                            invertedIndex[token][docId] = tf +1
                        else:
                            invertedIndex[token][docId] = 1
                    else:
                        invertedIndex[token] = {docId:1}
        tokensLength.write (str(docId) + ':' + str(tokenCounter) +'\n')
    tokensLength.close()
    Content.close()                
    return invertedIndex

Exemple #21

0

Afficher le fichier

Fichier : test_analysis_unittest.py Projet : SalvatoreTosti/CoralImageAnalysis

 def test_Features_meanKeyPointSize_vs_clean_class(self):
     img = self.image
     gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
     ngray = Normalization.equalizeHistograms(gray)
     
     self.feats.detect_kp_ORB(ngray)
     mean = self.feats.meanKeyPointSize()
     
     tmpfeats = Features()
     tmpfeats.detect_kp_ORB(ngray)
     tmpMean = numpy.mean(tmpfeats.keypointsizes)
     
     self.assertEqual(mean,tmpMean)

Exemple #22

0

Afficher le fichier

Fichier : test_analysis_unittest.py Projet : SalvatoreTosti/CoralImageAnalysis

 def test_Edges_Canny_vs_raw_calculation(self):
     """!!!Note:
     This is a helper method called within Edges.sumCanny().
     """
     img = self.image
     gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
     ngray = Normalization.equalizeHistograms(gray)
     cann = self.edges.Canny(ngray,1,255)
     
     tmpCann = cv2.Canny(ngray,1,255)
     
     tmpBool = (cann==tmpCann).all()
     self.assertTrue(tmpBool)

Exemple #23

0

Afficher le fichier

Fichier : TFEngine.py Projet : TheDuck314/go-NN

    def pick_model_move(self, color):
        if self.model.Nfeat == 15:
            board_feature_planes = Features.make_feature_planes_stones_3liberties_4history_ko(self.board, color)
            Normalization.apply_featurewise_normalization_B(board_feature_planes)
        elif self.model.Nfeat == 21:
            board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(self.board, color).astype(np.float32)
            Normalization.apply_featurewise_normalization_C(board_feature_planes)
        else:
            assert False
        feature_batch = Symmetry.make_symmetry_batch(board_feature_planes)

        feed_dict = {self.feature_planes: feature_batch}

        logit_batch = self.sess.run(self.logits, feed_dict)
        move_logits = Symmetry.average_plane_over_symmetries(logit_batch, self.model.N)
        softmax_temp = 1.0
        move_probs = softmax(move_logits, softmax_temp)

        # zero out illegal moves
        for x in xrange(self.model.N):
            for y in xrange(self.model.N):
                ind = self.model.N * x + y 
                if not self.board.play_is_legal(x, y, color):
                    move_probs[ind] = 0
        sum_probs = np.sum(move_probs)
        if sum_probs == 0: return Move.Pass() # no legal moves, pass
        move_probs /= sum_probs # re-normalize probabilities

        pick_best = True
        if pick_best:
            move_ind = np.argmax(move_probs)
        else:
            move_ind = sample_from(move_probs)
        move_x = move_ind / self.model.N
        move_y = move_ind % self.model.N

        self.last_move_probs = move_probs.reshape((self.board.N, self.board.N))

        return Move(move_x, move_y)

Exemple #24

0

Afficher le fichier

Fichier : knn_classifier.py Projet : ExploreNcrack/kNN-Text-Classification

def main():
    """
	The program must accept two command line arguments: 
	-train.json
	-test.json
	"""
    # first handle user input
    trainJSONData, testJSONData = command_parser()

    # import the text process after checking user input
    import Normalization
    import Tokenization

    # init text processing classes
    global normalization, tokenization
    normalization = Normalization.Normalizer()
    tokenization = Tokenization.Tokenizer()

    print("Pre-processing begin >>>>>>>>")
    # Perform Data pre-processing (text processing and get each document terms)
    Document_vectors, corpus, number_of_document, corpus_count = pre_processing(
        trainJSONData)
    print("<<<<<<<< Pre-processing done")
    # apply the kNN
    best_accuary = -1
    best_k = -1
    decrease = 0
    k_parameter_accuracy = []
    # try all different parameter k
    # until if there are two consectively decreases
    # then stop
    for k in range(1, number_of_document):
        print("Apply kNN begin with K=%d  >>>>>>>>" % (k))
        accuracy = apply_kNN_on_test_documents(testJSONData, Document_vectors,
                                               corpus, number_of_document,
                                               corpus_count, k)
        k_parameter_accuracy.append(accuracy)
        print("<<<<<<<< Apply kNN done with K=%d" % (k))
        print("Accuracy: " + str(accuracy) + "  with K=%d" % (k))
        if accuracy > best_accuary:
            best_accuary = accuracy
            best_k = k
        if k > 1 and accuracy < k_parameter_accuracy[k - 2]:
            decrease += 1
        if decrease == 2:
            # if consectively decreasing break
            print("Two consectively decreasing accuracy! Stop here")
            break
    print("")
    print("Best Accuracy: %f  with parameter K=%d" % (best_accuary, best_k))

Exemple #25

0

Afficher le fichier

Fichier : test_analysis_unittest.py Projet : SalvatoreTosti/CoralImageAnalysis

 def test_Laplacian_sum_vs_raw_calculation(self):
     img = self.image
     gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
     ngray = Normalization.equalizeHistograms(gray)
     kern = 9
     lapSum = self.lap.sum(ngray,kern)
     
     
     tmpLap = Laplacian()
     gl = tmpLap.calculate(ngray,kern,)
     glo = gl>10
     tmpLapSum = numpy.sum(glo)
     
     self.assertEqual(lapSum,tmpLapSum)

Exemple #26

0

Afficher le fichier

Fichier : test_analysis_unittest.py Projet : SalvatoreTosti/CoralImageAnalysis

 def test_Features_detect_kp_ORB_vs_raw_calculation(self):
     img = self.image
     gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
     ngray = Normalization.equalizeHistograms(gray)
     self.feats.detect_kp_ORB(ngray)
     
     orbdetector = cv2.FeatureDetector_create('ORB')
     keypointsizes=[]
     keypoints = orbdetector.detect(ngray, None)
     for k in keypoints:
         keypointsizes.append(k.size)
     keypointsizes = numpy.array(keypointsizes)
 
     tmpBool = (self.feats.keypointsizes==keypointsizes).all()
     self.assertTrue(tmpBool)

Exemple #27

0

Afficher le fichier

Fichier : test_analysis_unittest.py Projet : SalvatoreTosti/CoralImageAnalysis

    def test_Laplacian_calculate_vs_raw_calcuation(self):
        """!!!Note:
        This is a helper method called from within Laplacian.sum().
        """
        
        
        img = self.image
        gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
        ngray = Normalization.equalizeHistograms(gray)
        kern = 3
        
        lapCalc = self.lap.calculate(img,kern,)
        tmpLapCalc = cv2.Laplacian(gray,cv2.CV_16U,ksize=kern)

        tmpBool = (lapCalc==tmpLapCalc).all()
        self.assertTrue(tmpBool)

Exemple #28

0

Afficher le fichier

def run():
    time.clock()
    t0 = float(time.clock())

    # load data from file, and do normalization on X.
    [trainX, trainY, testX, testY] = ld.LoadData()
    t1 = float(time.clock())
    print 'Loading data from File. using time %.4f s, \n' % (t1 - t0)

    [trainX, testX] = nor.Normalization(trainX, testX)
    t2 = float(time.clock())
    print 'Normalization on train & test X. using time %.4f s, \n' % (t2 - t1)

    # implementation assignments
    lr_reg = [0.001, 0.01, 0.1, 1, 10, 100]  #learning rate
    max_iter = 1000000  # max iteration
    eps = 0.001  # gradient comparing epsilon
    lmd_reg = [0, 0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10,
               100]  # regularization lambda

    # part 1, lamda = 0, different learning rate
    best_lr = run_part1(trainX,
                        trainY)  #default lr,grad_epsilon and max_iterations
    # [lr,bestloss,weight,lossCont] = HW1_part_1(trainX,trainY) #default lr,grad_epsilon and max_iterations
    t3 = float(time.clock())
    print 'Part 1, lamda = 0, changing lr, using time %.4f s, \n' % (t3 - t2)

    # part2: fixed learning rate, different lamda
    max_iter = 10000
    run_part2(trainX, trainY, testX, testY, lmd_reg, best_lr, eps, max_iter)
    t4 = float(time.clock())
    print 'Part 2, lr = 0.05, changing lmd, using time %.4f s, \n' % (t4 - t3)

    # part3: fixed lr, using 10-fold cross-validation
    # split training data into k parts
    max_iter = 1000
    k = 10
    run_part3(trainX, trainY, testX, testY, best_lr, eps, max_iter, lmd_reg, k)
    t5 = float(time.clock())
    print 'Part 3, lr = 0.05, fidining the best lmd, using time %.4f s, \n' % (
        t4 - t3)

Exemple #29

0

Afficher le fichier

def predict():
    '''
    For rendering results on HTML GUI
    '''
    float_features = [float(x) for x in request.form.values()]
    final_features = [np.array(float_features)]

    max_value = [float(77), float(1), float(4), float(200), float(603), float(1), float(2), float(202), float(1), float(6.2), float(3), float(9), float(7)]
    min_value = [float(28), float(0), float(1), float(0), float(0), float(0), float(0), float(60), float(0), float(-2.6), float(0), float(0), float(1)]
    final_features.append(max_value)
    final_features.append(min_value)
    
    temp_scales = Normalization.rescaling_with_MinMaxScaler_sklearn(final_features)
    
    prediction = model.predict([np.array(temp_scales[0])])

    output = prediction[0]
    if (output == 1):
        ketluan = "có"
    else: ketluan = "không có"
    return render_template('index.html', prediction_text='bệnh nhân {} nguy cơ mắc bệnh lý tim mạch...'.format(ketluan))

Exemple #30

0

Afficher le fichier

Fichier : featureDetection_Refactor_Map.py Projet : SalvatoreTosti/CoralImageAnalysis

def imageHandler(imgfile):
    retStrings = []
    
    if imgfile in seen:
        return
    
    if os.path.isdir(os.path.join(args.directory, imgfile)):
        temp = os.listdir(os.path.join(args.directory, imgfile))
        for f in temp:
            images.append(os.path.join(imgfile, f))
        return
    
    # silently skip the bin files that have the gps data
    if imgfile.endswith('bin'):
        return
    # alert to other files that were skipped
    if not (imgfile.endswith('png') | imgfile.endswith('jpg')):
        sys.stderr.write("Skipped file: " + imgfile + "\n")
        return
    
    if args.verbose:
        sys.stderr.write("Parsing " + imgfile + "\n")
    
    retStrings.append( imgfile + "\t" )
    
    img = ImageIO.cv2read(os.path.join(args.directory, imgfile))
    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    ngray = Normalization.equalizeHistograms(gray)
    ngray = cv2.GaussianBlur(ngray, (3,3), 0)
    
    feats.detect_kp_ORB(ngray)
    retStrings.append( str(feats.numberKeyPoints()) + "\t" + str(feats.medianKeyPointSize()) + "\t" + str(feats.meanKeyPointSize()) )
    
    for i in range(15):
        retStrings.append("\t" + str(feats.numKeyPoints(i*10)))
    retStrings.append("\n")
    
    return retStrings

Exemple #31

0

Afficher le fichier

Fichier : MethodGuessing.py Projet : xtmgah/MetaXcan

def chooseZscoreSchemeFromFiles(folder, beta_contents, covariance_entries, weight_db_logic):
    beta_content = beta_contents[0]
    beta_path = os.path.join(folder, beta_content)
    zscore_scheme = None
    normalization_scheme = None
    with gzip.open(beta_path) as content:
        header = content.readline().strip()
        # So. If beta_z is present, just go for "modified formula" with reference variance.
        # Any other option has to be specifically chosen.
        if "beta_z" in header:
            zscore_scheme = ZScoreCalculation.BETA_Z_SIGMA_REF
            normalization_scheme = Normalization.NONE
        elif "beta" in header and "sigma_l" in header:
            zscore_scheme = ZScoreCalculation.METAXCAN
            normalization_scheme = _chooseNormalization(header)
        elif "beta" in header and not "sigma_l" in header:
            zscore_scheme = ZScoreCalculation.METAXCAN_FROM_REFERENCE
            normalization_scheme = _chooseNormalization(header)
        else:
            raise Exception("Couldn't infer data from beta file header")
    logging.info("Chose zscore scheme '%s' and normalization '%s'", zscore_scheme, normalization_scheme)
    zscore_calculation = ZScoreCalculation. ZScoreScheme(zscore_scheme)
    normalization = Normalization.normalizationScheme(normalization_scheme, covariance_entries, weight_db_logic)
    return zscore_calculation, normalization

Exemple #32

0

Afficher le fichier

Fichier : analyzeDZImages_Refactor.py Projet : SalvatoreTosti/CoralImageAnalysis

def imageWriter(images,seen,args,fout,classification,stats,fft,lap,edge):
    for imgfile in images:

        if imgfile in seen: continue

        if os.path.isdir(os.path.join(args.directory, imgfile)):
            temp = os.listdir(os.path.join(args.directory, imgfile))
            for f in temp:
                images.append(os.path.join(imgfile, f))
            continue
        
        #rewrite of above if statement
        #if os.path.isdir(os.path.join(args.directory,imgfile)):
        #    temp = os.listdir(os.path.join(args.directory,imgfile))
        #    pool = Pool()
        #    pool.map(images.append, os.path.join(imgfile, f))
        #    pool.close()
        #    pool.join()
        #    continue
            

        if not args.all and imgfile not in classification:
            continue

        # silently skip the bin files that have the gps data
        if imgfile.endswith('bin'):
            continue
        # alert to other files that were skipped
        if not (imgfile.endswith('png') | imgfile.endswith('jpg')):
            sys.stderr.write("Skipped file: " + imgfile + "\n")
            continue

        if args.verbose:
            sys.stderr.write("Parsing " + imgfile + "\n")

        fout.write( imgfile + "\t" )
        if imgfile in classification:
            fout.write( classification[imgfile] + "\t")
        else:
            fout.write( "unknown\t" )

        img = ImageIO.cv2read(os.path.join(args.directory, imgfile))
        gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
        fout.write( ('\t'.join(map(str, [stats.min(gray), stats.max(gray), stats.median(gray), stats.mean(gray)]))) + "\t" )

        ngray = Normalization.equalizeHistograms(gray)
        # apply a gaussian blur to remove edge effects
        ngray = cv2.GaussianBlur(ngray, (3,3), 0)
        fout.write( ('\t'.join(map(str, [stats.min(ngray), stats.max(ngray), stats.median(ngray), stats.mean(ngray)]))) + "\t")

        for i in range(3):
            imp = img[:,:,i]
            fout.write( ('\t'.join(map(str, [stats.min(imp), stats.max(imp), stats.median(imp), stats.mean(imp)]))) + "\t" )
        fout.write( str(fft.energy(gray)) + "\t" + str(fft.energy(ngray)) + "\t")

        if args.features:
            feats.detect_kp_ORB(ngray)
            fout.write( str(feats.numberKeyPoints()) + "\t" + str(feats.medianKeyPointSize()) + "\t" + str(feats.meanKeyPointSize()) + "\t")

            for i in range(15):
                fout.write( str(feats.numKeyPoints(i*10)) + "\t")
        else:
            fout.write("0\t0\t0\t");
            for i in range(15):
                fout.write("0\t")
    
        for i in range(15):
            k=2*i+1
            fout.write( str(lap.sum(ngray, k)) + "\t")

        for i in range(25):
            t2 = 10*i
            fout.write( str(edge.sumCanny(ngray, 1, t2)) + "\t")
        #edge.sumCanny(gray)

        # Contour detection
        ctr = Contours.contours(ngray)
        for i in range(5):
            threshold=50*i
            ctr.withCanny(1, threshold)
            if ctr.numberOfContours() == 0:
                fout.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" )
            else:
                try:
                    fout.write( "\t".join(map(str, [ctr.numberOfContours(), ctr.numberOfClosedContours(),
                                          ctr.numberOfOpenContours(), ctr.totalContourArea(), cv2.contourArea(ctr.largestContourByArea()),
                                          ctr.totalPerimeterLength()])) + "\t")
                    ctr.linelengths()
                    fout.write( "\t".join(map(str, [ctr.maxLineLength(), ctr.meanLineLength(), ctr.medianLineLength(), ctr.modeLineLength()])) + "\t")
                except Exception as e:
                    sys.stderr.write("There was an error calculating the contours for " + imgfile +": " + e.message + "\n")
                    fout.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" )

        fout.write("\n")

Exemple #33

0

Afficher le fichier

Fichier : SML.py Projet : Winnaries/pytorch-neural-art

def get_style_model_and_losses(device,
                               cnn,
                               normalization_mean,
                               normalization_std,
                               style_img,
                               content_img,
                               content_layers=content_layers_default,
                               style_layers=style_layers_default):
    cnn = copy.deepcopy(cnn)

    # normalization module
    normalization = norm.Normalization(normalization_mean,
                                       normalization_std).to(device)

    # just in order to have an iterable access to or list of content/syle
    # losses
    content_losses = []
    style_losses = []

    # assuming that cnn is a nn.Sequential, so we make a new nn.Sequential
    # to put in modules that are supposed to be activated sequentially
    model = nn.Sequential(normalization)

    i = 0  # increment every time we see a conv
    for layer in cnn.children():
        if isinstance(layer, nn.Conv2d):
            i += 1
            name = 'conv_{}'.format(i)
        elif isinstance(layer, nn.ReLU):
            name = 'relu_{}'.format(i)
            # The in-place version doesn't play very nicely with the ContentLoss
            # and StyleLoss we insert below. So we replace with out-of-place
            # ones here.
            layer = nn.ReLU(inplace=False)
        elif isinstance(layer, nn.MaxPool2d):
            name = 'pool_{}'.format(i)
        elif isinstance(layer, nn.BatchNorm2d):
            name = 'bn_{}'.format(i)
        else:
            raise RuntimeError('Unrecognized layer: {}'.format(
                layer.__class__.__name__))

        model.add_module(name, layer)

        if name in content_layers:
            # add content loss:
            target = model(content_img).detach()
            content_loss = cl.ContentLoss(target)
            model.add_module("content_loss_{}".format(i), content_loss)
            content_losses.append(content_loss)

        if name in style_layers:
            # add style loss:
            target_feature = model(style_img).detach()
            style_loss = sl.StyleLoss(target_feature)
            model.add_module("style_loss_{}".format(i), style_loss)
            style_losses.append(style_loss)

    # now we trim off the layers after the last content and style losses
    for i in range(len(model) - 1, -1, -1):
        if isinstance(model[i], cl.ContentLoss) or isinstance(
                model[i], sl.StyleLoss):
            break

    model = model[:(i + 1)]

    return model, style_losses, content_losses

Exemple #34

0

Afficher le fichier

Fichier : Read_Data.py Projet : NguyenQuyThien/predict_the_diagnosis_of_heart_disease_patients

                         (float(row[3]) if not (row[3] == '-9') else np.NaN), \
                         (float(row[4]) if not (row[4] == '-9') else np.NaN), \
                         (float(row[5]) if not (row[5] == '-9') else np.NaN), \
                         (float(row[6]) if not (row[6] == '-9') else np.NaN), \
                         (float(row[7]) if not (row[7] == '-9') else np.NaN), \
                         (float(row[8]) if not (row[8] == '-9') else np.NaN), \
                         (float(row[9]) if not (row[9] == '-9') else np.NaN), \
                         (float(row[10]) if not (row[10] == '-9') else np.NaN), \
                         (float(row[11]) if not (row[11] == '-9') else np.NaN), \
                         (float(row[12]) if not (row[12] == '-9') else np.NaN), \
                         (float(row[13]) if (float(row[13]) == 0) else 1)])

        # y.append(float(row[13]))
        if (float(row[13]) == 0):
            patient0.append(temp)
        else:
            patient1.append(temp)

        # if line_count == 10: break #                        *********** gioi han 10 mau
        line_count += 1

patient = np.append(patient0, patient1, axis=0)

# patient_with_most_frequent = missing_values_with_most_frequent_values(patient)
patient_with_the_mean_values = missing_values_with_the_mean_values(patient)
# patient_with_remove_missing_values = remove_missing_values(patient)

patient_scales = Normalization.rescaling_with_MinMaxScaler_sklearn(
    patient_with_the_mean_values)

patient, patient_with_most_frequent, patient_with_the_mean_values, patient_with_remove_missing_values = None, None, None, None

Exemple #35

0

Afficher le fichier

        # 可视化径向剖分结果
        # img = DrawFeature(feature_select1, feature_select2)
        #         # img_path1 = csv_list_same[i][0].replace('.csv', '.jpg')
        #         # img_path2 = csv_list_same[i][1].replace('.csv', '.jpg')
        #         # img.draw(img_path1, img_path2)

        feature_vec1 = FeatureVec(feature_select1.x_fv, feature_select1.y_fv, feature_select1.featuredirect).feature_vec_common
        feature_vec2 = FeatureVec(feature_select2.x_fv, feature_select2.y_fv, feature_select2.featuredirect).feature_vec_common
        l_s1 = len(feature_vec1)
        l_s2 = len(feature_vec2)
        # 剔除掉特征数少于3个的，因为特征数小于3则无法进行径向剖分
        if l_s1<3 or l_s2 <3:
            continue

        # 特征矩阵归一化
        featurevec1 = Normalization(max, min).normalize(feature_vec1)
        featurevec2 = Normalization(max, min).normalize(feature_vec2)

        # 将同源鞋印的特征索引列表，每个元素左移
        l_s = len(featurevec1)
        matrix = list(range(l_s))
        # 移动后的结果列表组合
        m = roll_list(matrix)
        d_combine = []
        for l in m:
            fv_modify = featurevec2[l, :]
            d = CalcFeatureDistance(featurevec1, fv_modify).distance
            d_combine.append(d)
        # 特征扭转的结果排序，取最小值
        d_combine.sort()
        d = d_combine[0]

Exemple #36

0

Afficher le fichier

Similarity_Result = calculateSimilarity(name, nameList)

actual_NoOf_Post = actualNumberOfPost(name, nameList)

length_in_comment = get_average_coment_Length(nameCommentMention, nameList)

#Add Nuwan method
FeatureExtraction.Uptodatedness_score()

averge_updatness = get_average_updatness()

FinalInforScore, FinalEmoScore = apeend_Support_level(nameList)

Print_final_featuers(nameList, Page_Rank_Result, Final_hubs, Final_authorities,
                     Similarity_Result, Final_Number_Of_post_Count, pr,
                     actual_NoOf_Post, length_in_comment, averge_updatness,
                     FinalInforScore, FinalEmoScore)

#normalize methoda call

Normalization.normalization()

#regression call

Regression_model.getScore_Using_Regression_Model()

#rankingCall

rankModel.rank_scores()

Exemple #37

0

Afficher le fichier

Fichier : create_index.py Projet : ExploreNcrack/Language-Model-Information-Retrieval

def main():
	"""
	The program must accept two command line arguments: 
	the first is the directory containing the documents to be indexed, 
	and the second must be the directory where the index will be stored.
	"""
	# first handle user input
	if len(sys.argv) != 3:
		# number of argument is not correct
		print("Two arguments are needed:")
		print("1. the directory containing the documents to be indexed")
		print("2. the directory where the index will be stored")
		return 
	docDir = sys.argv[1]
	indexDir = sys.argv[2]
	if not os.path.isdir(docDir) or not os.path.isdir(indexDir):
		# the given input dir are invalid
		print("The given directory is invalid")
		return 
	# append / if not present in the directory
	if docDir[-1] != "/":
		docDir += "/"
	if indexDir[-1] != "/":
		indexDir += "/"
	if indexDir == "/":
		indexDir = "." + indexDir
	if docDir == "/":
		docDir = "." + docDir
	# retrieve all documents in the given directory
	allDoc = []
	for subDir in os.walk(docDir):
		# recursively retrieve all files in each subDir
		# docDir is also a subDir of itself
		for doc in subDir[2]:
			# all documents in subDir
			allDoc.append(doc)

	#######################################################################################################################

	# intialization for building index
	import Normalization 
	import Tokenization 
	import SQLite3database 
	# init text processing classes
	normalization = Normalization.Normalizer()
	tokenization = Tokenization.Tokenizer()
	# create a SQLite3 database
	indexDatabase = SQLite3database.Database(indexDir+"index.db")
	# create title index database
	titleDatabase = SQLite3database.Database(indexDir+"title.db")
	# create table
	createTable(indexDatabase)
	createTable(titleDatabase)
	# init final insert string
	indexDatabase.initInsertString()
	indexDatabase.addBeginTransactionString()
	titleDatabase.initInsertString()
	titleDatabase.addBeginTransactionString()
	# intializing insert string
	insertDocument = "INSERT INTO document VALUES"
	insertDictionary = "INSERT INTO dictionary VALUES"
	insertTermPosition = "INSERT INTO termPosition VALUES"
	insertDocumentFrequency = "INSERT INTO documentFrequency VALUES"
	insertTermFrequency = "INSERT INTO termFrequency VALUES"

	insertDocumentTitle = "INSERT INTO document VALUES"
	insertDictionaryTitle = "INSERT INTO dictionary VALUES"
	insertTermPositionTitle = "INSERT INTO termPosition VALUES"
	insertDocumentFrequencyTitle = "INSERT INTO documentFrequency VALUES"
	insertTermFrequencyTitle = "INSERT INTO termFrequency VALUES" 

	# store document frequency of each vocabulary
	dictionary = {} # contain all vocabulary over all (vocabulary as key, document frequncy as value)
	titleDic = {} 
	for doc in allDoc:
		# First read and process text from the current document
		# open file to read
		text = open(docDir+doc,"r").read()

		noTxt = doc.rstrip(".txt")
		title = " ".join(noTxt.split("_")[2:])

		# process raw text from document
		tokens = cleanText(text, tokenization, normalization) # return a list of term/vocabulary after tokenization and normalization
		titleTokens = cleanText(title.lower(), tokenization, normalization) 
		# Then
		# Traverse the term/vocabulary list and record the information
		# -position 
		# -count
		# init 
		termFrequency = {} # (vocabulary and documentID as key, term frequency as value)
		titleTermFrequency = {}
		documentID = int(doc.split("_")[1]) # extract document ID
		insertDocument += """ ({docID},"{docName}",{docLength}),""".format(docID=documentID, docName=doc, docLength=len(tokens))
		insertDocumentTitle += """ ({docID},"{docName}",{docLength}),""".format(docID=documentID, docName=doc, docLength=len(titleTokens))
		alreadyIncrement = {} # use for check if the document frequency in this document is already increment
		alreadyIncrementTitle = {}
		for index,token in enumerate(tokens):
			# insert position of this token in the document
			insertTermPosition += """ ("{word}",{docID},{position}),""".format(word=token, docID=documentID, position=index+1)
			if token not in dictionary:
				dictionary[token] = 1
				alreadyIncrement[token] = None
				# insert if this token is the first time encounter overall 
				insertDictionary += """ ("{word}"),""".format(word=token)
			elif token not in alreadyIncrement:
				dictionary[token] += 1
				alreadyIncrement[token] = None
			if token not in termFrequency:
				termFrequency[token] = 1
			else:
				termFrequency[token] += 1
		for key,val in termFrequency.items():
			insertTermFrequency += """ ("{word}",{docID},{termFreq}),""".format(word=key, docID=documentID, termFreq=val)

		for index,token in enumerate(titleTokens):
			# insert position of this token in the document
			insertTermPositionTitle += """ ("{word}",{docID},{position}),""".format(word=token, docID=documentID, position=index+1)
			if token not in titleDic:
				titleDic[token] = 1
				alreadyIncrementTitle[token] = None
				# insert if this token is the first time encounter overall 
				insertDictionaryTitle += """ ("{word}"),""".format(word=token)
			elif token not in alreadyIncrementTitle:
				titleDic[token] += 1
				alreadyIncrementTitle[token] = None
			if token not in titleTermFrequency:
				titleTermFrequency[token] = 1
			else:
				titleTermFrequency[token] += 1
		for key,val in titleTermFrequency.items():
			insertTermFrequencyTitle += """ ("{word}",{docID},{termFreq}),""".format(word=key, docID=documentID, termFreq=val)


	# insert the document frequency
	for key,val in dictionary.items():
		insertDocumentFrequency += """ ("{word}",{docFrequency}),""".format(word=key, docFrequency=val)

	for key,val in titleDic.items():
		insertDocumentFrequencyTitle += """ ("{word}",{docFrequency}),""".format(word=key, docFrequency=val)

	# get rid of the ',' at the end of each insert string
	# replace it with ';'
	insertDocument = insertDocument[:-1] + ";"
	insertDictionary = insertDictionary[:-1] + ";"
	insertTermPosition = insertTermPosition[:-1] + ";"
	insertTermFrequency = insertTermFrequency[:-1] + ';'
	insertDocumentFrequency = insertDocumentFrequency[:-1] + ";"


	insertDocumentTitle = insertDocumentTitle[:-1] + ";"
	insertDictionaryTitle = insertDictionaryTitle[:-1] + ";"
	insertTermPositionTitle = insertTermPositionTitle[:-1] + ";"
	insertTermFrequencyTitle = insertTermFrequencyTitle[:-1] + ';'
	insertDocumentFrequencyTitle = insertDocumentFrequencyTitle[:-1] + ";"

	# add all insert string to the final insert string
	indexDatabase.addInsertString(insertDocument)
	indexDatabase.addInsertString(insertDictionary)
	indexDatabase.addInsertString(insertTermPosition)
	indexDatabase.addInsertString(insertTermFrequency)
	indexDatabase.addInsertString(insertDocumentFrequency)
	indexDatabase.addCommitString()
	indexDatabase.execute(indexDatabase.getInsertString())
	createBtreeIndex(indexDatabase)
	indexDatabase.close()

	titleDatabase.addInsertString(insertDocumentTitle)
	titleDatabase.addInsertString(insertDictionaryTitle)
	titleDatabase.addInsertString(insertTermPositionTitle)
	titleDatabase.addInsertString(insertTermFrequencyTitle)
	titleDatabase.addInsertString(insertDocumentFrequencyTitle)
	titleDatabase.addCommitString()
	titleDatabase.execute(titleDatabase.getInsertString())
	createBtreeIndex(titleDatabase)
	titleDatabase.close()

Exemple #38

0

Afficher le fichier

Fichier : testNormalization.py Projet : brobinit/CoralImageAnalysis

A test program to make sure that the Normalization methods work.

This normalizes the images, and displays them.
'''

#test = 'test.jpg'
test = "/home/redwards/Dropbox/ComputerVision/TestCode/test.png"
im = ImageIO.cv2read(test)
print im.shape


print "Testing Tylers normalization"
tn = Normalization.Tyler(im)

print "Testing histogram equalization"
he = Normalization.equalizeHistograms(im)
heo = numpy.ones_like(im)
heo[:,:,0]=he
heo[:,:,1]=he
heo[:,:,2]=he

print "Simple normaliztion"
nh = Normalization.simpleNorm(im)
print nh.shape

partone = numpy.vstack([im, heo])
parttwo = numpy.vstack([tn, nh])

allim = numpy.hstack([partone, parttwo])

print "Press any key to exit\n"

Exemple #39

0

Afficher le fichier

Fichier : TestData.py Projet : Oredigger/SeizureDetection

    data = np.delete(data, i)

# Upload the results from training
kernels = ['rbf', 'linear']
for i in range(NUM_CONFIG):
    if not i == 0:
        plt.show(block=False)
    filename = file + "_" + kernels[i] + ".pkg"
    loadData = pickle.load(open(filename, 'rb'))
    clf = loadData['model']
    Method = loadData['Method']
    Norm = loadData['Norm']
    if Norm == "MeanStd":
        print("Using MeanStd")
        temp_thetaBandPowerFeature1 = Normalization.normalizeDataMeanStd(
            np.asarray(thetaBandPowerFeature1), loadData['mean'][0],
            loadData['std'][0])
        temp_alphaBandPowerFeature1 = Normalization.normalizeDataMeanStd(
            np.asarray(alphaBandPowerFeature1), loadData['mean'][1],
            loadData['std'][1])
        temp_betaBandPowerFeature1 = Normalization.normalizeDataMeanStd(
            np.asarray(betaBandPowerFeature1), loadData['mean'][2],
            loadData['std'][2])
        temp_nonlinearEnergyFeature1 = Normalization.normalizeDataMeanStd(
            np.asarray(nonlinearEnergyFeature1), loadData['mean'][3],
            loadData['std'][3])
        temp_lineLengthFeature1 = Normalization.normalizeDataMeanStd(
            np.asarray(lineLengthFeature1), loadData['mean'][4],
            loadData['std'][4])
    elif Norm == "MinMax":
        print("Using MinMax")

Exemple #40

0

Afficher le fichier

Fichier : lm_query.py Projet : ExploreNcrack/Language-Model-Information-Retrieval

def main():
    # First of all check the user input
    indexFilePath, k, printScore, queryTermString = checkInput()
    # open the database file that is given
    indexDatabase = SQLite3database.Database(
        sys.argv[1])  #This also handle file error
    # cursor
    cursor = indexDatabase.getCursor()
    # check if the tables needed exists in the index storage file
    tablesNeeded = [
        "dictionary", "document", "termPosition", "documentFrequency",
        "termFrequency"
    ]
    if checkIfTableNeedExist(indexDatabase, cursor, tablesNeeded) == False:
        print(
            "The given index storage file does not contain the required Tables."
        )
        indexDatabase.close()
        return
    # last check for k
    cursor.execute("SELECT COUNT(*) FROM document;")
    NumberOfDocument = cursor.fetchall()[0][0]
    if k > int(NumberOfDocument):
        print(
            "The second argument k is larger than the number of document in the input collection."
        )
        print("Arugmnet k should be less or equal to: %d" %
              (int(NumberOfDocument)))
        indexDatabase.close()
        sys.exit(-1)

    ##################################################################################################################################
    """
	At this point, all input should be all validated,
	and database file has opened,
	The database file has all the information represent the each document language model
	-tf (term frequency) in each of the document
	-document length for each document
	and along with some other extra information
	"""

    # First of all, do text processing(clean text) on the query term
    # (The same way that is done to the input data document terms)
    import Normalization
    import Tokenization

    normalization = Normalization.Normalizer()
    tokenization = Tokenization.Tokenizer()
    queryTermsList = cleanText(queryTermString, tokenization, normalization)
    print("Query Terms:")
    print(queryTermsList)
    # Perform the computation of probability of generating the query terms on the document model
    topKdocument = ComputeProbabilityGeneratingQueryTerms(
        queryTermsList, cursor, k)
    if printScore == "y":
        print(" %4s %63s" % ("Document Name:", "Query Likelyhood:"))
        for index, document in enumerate(topKdocument):
            print("%4d. %-60s" % (index + 1, document[0]), end="")
            print(document[1])
    else:
        print(" %4s" % ("Document Name:"))
        for index, document in enumerate(topKdocument):
            print("%4d. %-60s" % (index + 1, document[0]))
    # close the database file after
    indexDatabase.close()

Exemple #41

0

Afficher le fichier

Fichier : test_text_process.py Projet : ExploreNcrack/Language-Model-Information-Retrieval

import Normalization
import Tokenization


def cleanText(text, tokenization, normalization):
    """
	Input: string of text
	Return: a list of term/vocabulary after tokenization and normalization 
	"""
    # perform tokenization
    tokens = tokenization.tokenize(text)
    # perform normalization
    tokens = normalization.lemmatize(tokens)
    # get rid of non-meaningful character after tokenization
    tokens = tokenization.getRidPuncuation(tokens)
    return tokens


normalization = Normalization.Normalizer()
tokenization = Tokenization.Tokenizer()

dd = cleanText(
    "adad.adad ada...adad..ad 1941.http u.s.a. #Dadad #Rats sgsgs...",
    tokenization, normalization)
print(dd)

Exemple #42

0

Afficher le fichier

Fichier : Eval.py Projet : TheDuck314/go-NN

 def evaluate(self, board):
     board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(board, board.color_to_play).astype(np.float32)
     Normalization.apply_featurewise_normalization_C(board_feature_planes)
     feed_dict = {self.feature_planes: board_feature_planes.reshape(1,self.model.N,self.model.N,self.model.Nfeat)}
     score = np.asscalar(self.sess.run(self.score_op, feed_dict))
     return score

Exemple #43

0

Afficher le fichier

import DataInit, AddBias, GradientDescent, Normalization, runMachine
import os.path
import numpy as np

BASE = os.path.dirname(os.path.abspath(__file__))

print(os.path.join(BASE, "DataNew1.csv"))
Path = os.path.join(BASE, "DataNew1.csv")

# Path = "..\MachineLearningMF\Data.txt"
'''somethin somthin'''
data = DataInit.DataInit()
CostFuntion = runMachine.runMachine()
GDescent = GradientDescent.GradientDescent()
Norm = Normalization.Normalization()
AddBias1 = AddBias.AddBias()
'''loac csv'''
data.loader(Path)  # ,제거
'''theta init'''
'''initiated optimal theta 17/9/2019'''
# 동 별 theta값 입력 테스트
# theta = np.array([[theta0], [theta1], [theta2], [theta3], [theta4]])
theta = np.array([[0], [0], [0], [0], [0]])
'''Normalize'''
data.x, mu, sigma = Norm.featureNormalize(data)
'''Add Bias Column'''
data = AddBias1.addB(data)
'''remove annotations when you compute theta again'''
'''run Gradient descent and Cost function'''
theta = GDescent.runGradient(data, theta, 0.0001, 100000)  #theta값 뽑아내기
theta = theta.reshape(1, 5)  #행렬 곱셈하기 위해 형태바꾸기

Exemple #44

0

Afficher le fichier

Fichier : featureDetection.py Projet : SalvatoreTosti/CoralImageAnalysis

        for f in temp:
            images.append(os.path.join(imgfile, f))
        continue

    # silently skip the bin files that have the gps data
    if imgfile.endswith('bin'):
        continue
    # alert to other files that were skipped
    if not (imgfile.endswith('png') | imgfile.endswith('jpg')):
        sys.stderr.write("Skipped file: " + imgfile + "\n")
        continue

    if args.verbose:
        sys.stderr.write("Parsing " + imgfile + "\n")

    fout.write( imgfile + "\t" )
    
    img = ImageIO.cv2read(os.path.join(args.directory, imgfile))
    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    ngray = Normalization.equalizeHistograms(gray)
    ngray = cv2.GaussianBlur(ngray, (3,3), 0)

    feats.detect_kp_ORB(ngray)
    fout.write( str(feats.numberKeyPoints()) + "\t" + str(feats.medianKeyPointSize()) + "\t" + str(feats.meanKeyPointSize()) )

    for i in range(15):
        fout.write("\t" + str(feats.numKeyPoints(i*10)))
    fout.write("\n")

Exemple #45

0

Afficher le fichier

Fichier : __main__.py Projet : absinha8/Final_Projects

def main():
    print('My name is Abhishek Sinha')
    obj = Normalization.Normalize()
    obj.generateData()

Python Normalization, attention-learn-to-route Exemples