def numpy2csv(csv_out, X, y, file_names=None): ''' Creates a CSV file from the given data points (X, scipy matrix) and labels (y, numpy.array). The CSV file has a header. The first column is named 'class' and the others after PDFrate features. All features are written in their respective type format (e.g., True/False for booleans). If 'csv_out' is an open Python file, it will not be reopened. If it is a string, a file will be created with that name. ''' we_opened_csvfile = type(csv_out) == str csvfile = open(csv_out, 'wb+') if we_opened_csvfile else csv_out # Write header csvfile.write('class') if file_names: csvfile.write(',filename') names = FeatureDescriptor.get_feature_names() for name in names: csvfile.write(',{}'.format(name)) csvfile.write('\n') descs = FeatureDescriptor.get_feature_descriptions() # Write data for i in range(0, X.shape[0]): csvfile.write('{}'.format('TRUE' if bool(y[i]) else 'FALSE')) if file_names: csvfile.write(',{}'.format(file_names[i])) for j in range(0, X.shape[1]): feat_type = descs[names[j]]['type'] feat_val = X[i, j] if feat_type == bool: feat_val = 'TRUE' if feat_val >= 0.5 else 'FALSE' elif feat_type == int: feat_val = int(round(feat_val)) csvfile.write(',{}'.format(feat_val)) csvfile.write('\n') if we_opened_csvfile: csvfile.close()
Implementation of the mimicry attack. Created on July 1, 2013. ''' import os import random import sys import numpy from mimicus.tools.featureedit import FeatureEdit from mimicus.tools.datasets import numpy2csv, csv2numpy from mimicus.tools.featureedit import FeatureDescriptor descs = FeatureDescriptor.get_feature_descriptions() names = FeatureDescriptor.get_feature_names() def binarize(X_old): X_new = numpy.copy(X_old) for i in range(0, X_new.shape[0]): for j in range(0, X_new.shape[1]): feat_type = descs[names[j]]['type'] if feat_type != bool: if X_new[i][j] != 0: X_new[i][j] = 1 return X_new def validate(X_old):