Esempio n. 1
0
def main(src, dst, percent):
    data = scan_data(src)
    paths = []
    for _, path in data:
        paths.append(path)

    tot = len(paths)
    #shuffle
    shuffle(paths)
    count = 0
    for p in paths[:int(percent * tot)]:
        dst_p = p.replace(src, dst)
        os.makedirs(dst_p[:dst_p.rindex(os.path.sep)], exist_ok=True)
        copy(p, dst_p)
        count += 1
    print("Copied %d files of %d" % (count, tot))
Esempio n. 2
0
# Topic      : USC CSCI 544 Applied NLP Fall 16 - HW2
#             - Perceptron Classifier for Spam-Ham classification
# Author     : Thamme Gowda Narayanaswamy
# Student ID : 2074-6694-39
# Email      : [email protected]
# Date       : Oct 12, 2016

import sys
import os
from perceptron import AvgPerceptron
from perceptron import scan_data

if __name__ == '__main__':
    train_dir = sys.argv[1]
    data = list(scan_data(train_dir))
    print("Found %d total examples" % len(data))
    model = AvgPerceptron()
    model.learn(data, 30)
    model.save('per_model.txt')
Esempio n. 3
0
# Topic      : USC CSCI 544 Applied NLP Fall 16 - HW2
#             - Perceptron Classifier for Spam-Ham classification
# Author     : Thamme Gowda Narayanaswamy
# Student ID : 2074-6694-39
# Email      : [email protected]
# Date       : Oct 12, 2016

import sys
import os
from perceptron import StdPerceptron, scan_data

if __name__ == '__main__':
    train_dir = sys.argv[1]
    data = list(scan_data(train_dir))
    print("Found %d total examples" % len(data))
    model = StdPerceptron()
    model.learn(data, 20)
    model.save('per_model.txt')