def main(src, dst, percent): data = scan_data(src) paths = [] for _, path in data: paths.append(path) tot = len(paths) #shuffle shuffle(paths) count = 0 for p in paths[:int(percent * tot)]: dst_p = p.replace(src, dst) os.makedirs(dst_p[:dst_p.rindex(os.path.sep)], exist_ok=True) copy(p, dst_p) count += 1 print("Copied %d files of %d" % (count, tot))
# Topic : USC CSCI 544 Applied NLP Fall 16 - HW2 # - Perceptron Classifier for Spam-Ham classification # Author : Thamme Gowda Narayanaswamy # Student ID : 2074-6694-39 # Email : [email protected] # Date : Oct 12, 2016 import sys import os from perceptron import AvgPerceptron from perceptron import scan_data if __name__ == '__main__': train_dir = sys.argv[1] data = list(scan_data(train_dir)) print("Found %d total examples" % len(data)) model = AvgPerceptron() model.learn(data, 30) model.save('per_model.txt')
# Topic : USC CSCI 544 Applied NLP Fall 16 - HW2 # - Perceptron Classifier for Spam-Ham classification # Author : Thamme Gowda Narayanaswamy # Student ID : 2074-6694-39 # Email : [email protected] # Date : Oct 12, 2016 import sys import os from perceptron import StdPerceptron, scan_data if __name__ == '__main__': train_dir = sys.argv[1] data = list(scan_data(train_dir)) print("Found %d total examples" % len(data)) model = StdPerceptron() model.learn(data, 20) model.save('per_model.txt')