Python numpy2csv 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: mimicus.tools.datasets

메소드/함수: numpy2csv

hotexamples.com에서의 예제들: 4

Python numpy2csv - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 mimicus.tools.datasets.numpy2csv에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: featureextractor.py 프로젝트: zhenyu92/mimicus

def extract_features(pdfs_ben, pdfs_mal, csv_name):
    feat_vecs = []
    labels = []
    file_names = []
    # Extract malicious and benign features
    pool = multiprocessing.Pool()
    for pdf, feats in pool.imap(get_features, pdfs_mal):
        if feats is not None:
            feat_vecs.append(feats)
            labels.append(1.0)
            file_names.append(pdf)

    for pdf, feats in pool.imap(get_features, pdfs_ben):
        if feats is not None:
            feat_vecs.append(feats)
            labels.append(0.0)
            file_names.append(pdf)

    # Convert the data points into numpy.array
    X = numpy.array(numpy.zeros(
        (len(feat_vecs), featureedit.FeatureDescriptor.get_feature_count())),
                    dtype=numpy.float64,
                    order='C')
    for i, v in enumerate(feat_vecs):
        X[i, :] = v
    # Write the resulting CSV file
    datasets.numpy2csv(csv_name, X, labels, file_names)

예제 #2

파일 보기

파일: featureextractor.py 프로젝트: chubbymaggie/mimicus

def extract_features(pdfs_ben, pdfs_mal, csv_name):
    feat_vecs = []
    labels = []
    file_names = []
    # Extract malicious and benign features
    pool = multiprocessing.Pool()
    for pdf, feats in pool.imap(get_features, pdfs_mal):
        if feats is not None:
            feat_vecs.append(feats)
            labels.append(1.0)
            file_names.append(pdf)
    
    for pdf, feats in pool.imap(get_features, pdfs_ben):
        if feats is not None:
            feat_vecs.append(feats)
            labels.append(0.0)
            file_names.append(pdf)
    
    # Convert the data points into numpy.array
    X = numpy.array(numpy.zeros((len(feat_vecs), 
                                 featureedit.FeatureDescriptor.get_feature_count())), 
                                 dtype=numpy.float64, order='C')
    for i, v in enumerate(feat_vecs):
        X[i, :] = v
    # Write the resulting CSV file
    datasets.numpy2csv(csv_name, X, labels, file_names)

예제 #3

파일 보기

파일: R_randomForest.py 프로젝트: zhenyu92/mimicus

 def fit(self, X, y):
     '''
     Trains a new random forest classifier. 
     '''
     with _R_lock:
         with tempfile.NamedTemporaryFile() as tmpfile:
             datasets.numpy2csv(tmpfile, X, y)
             tmpfile.seek(0)
             # Read in the CSV file with the training samples, omitting the second column (filename)
             robjects.r('{train} <- read.csv("{csv}", header=TRUE, colClasses={cc})'.format(train=self.traindata_Rname, csv=tmpfile.name, cc=_r_colClasses))
             # Train a random forest named myforest using 1000 decision trees with 33 variables sampled at each split
             robjects.r('{model} <- randomForest(x={train}[,-1], y={train}[,1], ntree=1000, mtry=43, importance=TRUE)'.format(model=self.model_Rname, train=self.traindata_Rname))
     self.model_trained = True

예제 #4

파일 보기

파일: R_randomForest.py 프로젝트: zhenyu92/mimicus

 def decision_function(self, X):
     '''
     Classifies novel data points using a trained model. Returns a 
     list of predictions, one per data point, giving the probability 
     of the given data point belonging to the positive class. 
     '''
     assert self.model_trained, 'Must train a model before classification'
     with _R_lock:
         with tempfile.NamedTemporaryFile() as tmpfile:
             datasets.numpy2csv(tmpfile, X, numpy.zeros((X.shape[0],)))
             tmpfile.seek(0)
             # Read in the CSV file with the samples to be classified, omitting the second column (filename)
             robjects.r('{novel} <- read.csv("{csv}", header=TRUE, colClasses={cc})'.format(novel=self.noveldata_Rname, csv=tmpfile.name, cc=_r_colClasses))
             # Classify the new data points
             robjects.r('{pred} <- predict({model}, {novel}, type="prob")'.format(pred=self.predictions_Rname, model=self.model_Rname, novel=self.noveldata_Rname))
             predictions = list(robjects.r['{pred}'.format(pred=self.predictions_Rname)])
     # The first half of predictions is for the negative class, so get rid of the second half
     predictions = predictions[len(predictions) / 2:]
     res = numpy.zeros((X.shape[0], 1))
     for r, i in zip(predictions, range(X.shape[0])):
         res[i] = r
     return res