Ejemplo n.º 1
0
def get_FTC_mimicry():
    '''
    Returns a numpy.array of size (number of samples, number of 
    features) with feature values of all mimicry attack results in 
    the FTC scenario.
    '''
    pdfs = utility.get_pdfs(config.get('results', 'FTC_mimicry'))
    if not pdfs:
        # Generate the attack files
        attack_mimicry('FTC')
        pdfs = utility.get_pdfs(config.get('results', 'FTC_mimicry'))
    
    print 'Loading feature vectors from mimicry attack results...'
    results = numpy.zeros((len(pdfs), FeatureDescriptor.get_feature_count()))
    for i in range(len(pdfs)):
        results[i,] = FeatureEdit(pdfs[i]).retrieve_feature_vector_numpy()
    
    return results, [1.0 for i in range(len(pdfs))]
Ejemplo n.º 2
0
def get_FTC_mimicry():
    '''
    Returns a numpy.array of size (number of samples, number of 
    features) with feature values of all mimicry attack results in 
    the FTC scenario.
    '''
    pdfs = utility.get_pdfs(config.get('results', 'FTC_mimicry'))
    if not pdfs:
        # Generate the attack files
        attack_mimicry('FTC')
        pdfs = utility.get_pdfs(config.get('results', 'FTC_mimicry'))

    print 'Loading feature vectors from mimicry attack results...'
    results = numpy.zeros((len(pdfs), FeatureDescriptor.get_feature_count()))
    for i in range(len(pdfs)):
        results[i, ] = FeatureEdit(pdfs[i]).retrieve_feature_vector_numpy()

    return results, [1.0 for i in range(len(pdfs))]
Ejemplo n.º 3
0
def csv2numpy(csv_in):
    '''
    Parses a CSV input file and returns a tuple (X, y) with 
    training vectors (numpy.array) and labels (numpy.array), respectfully. 
    
    csv_in - name of a CSV file with training data points; 
                the first column in the file is supposed to be named 
                'class' and should contain the class label for the data 
                points; the second column of this file will be ignored 
                (put data point ID here). 
    '''
    # Parse CSV file
    csv_rows = list(csv.reader(open(csv_in, 'rb')))
    classes = {'FALSE': 0, 'TRUE': 1}
    rownum = 0
    # Count exact number of data points
    TOTAL_ROWS = 0
    for row in csv_rows:
        if row[0] in classes:
            # Count line if it begins with a class label (boolean)
            TOTAL_ROWS += 1
    # X = vector of data points, y = label vector
    X = numpy.array(numpy.zeros(
        (TOTAL_ROWS, FeatureDescriptor.get_feature_count())),
                    dtype=numpy.float64,
                    order='C')
    y = numpy.array(numpy.zeros(TOTAL_ROWS), dtype=numpy.float64, order='C')
    file_names = []
    for row in csv_rows:
        # Skip line if it doesn't begin with a class label (boolean)
        if row[0] not in classes:
            continue
        # Read class label from first row
        y[rownum] = classes[row[0]]
        featnum = 0
        file_names.append(row[1])
        for featval in row[2:]:
            if featval in classes:
                # Convert booleans to integers
                featval = classes[featval]
            X[rownum, featnum] = float(featval)
            featnum += 1
        rownum += 1
    return X, y, file_names