def test_fetch_text_file_with_correct_data(self): # Arrange file_name = "testdata\\test_data_2_rows.txt" line1 = { 'gender': 'F', 'age': '21', 'sales': '001', 'bmi': 'Normal', 'salary': '12', 'birthday': '01/01/1996', 'valid': '1' } line2 = { 'gender': 'M', 'age': '45', 'sales': '999', 'bmi': 'Underweight', 'salary': '725', 'birthday': '31/12/1971', 'valid': '1' } expected_result = {'A001': line1, 'Q001': line2} file = FileReader.fetch_text_file(file_name) # Act result = FileReader.fetch_text_contents(file, "", ",") # Assert self.assertTrue(result == expected_result)
def test_write_to_database_with_display_data(self): # Arrange test_name = "Write to Database with displaying data Test #02" insert = "Y" # see data saved to database expected_result = "['A001', 'F,', '21,', '001,', 'Normal,', '12,', '01/01/1996,', '1']" data_to_test = {'A001': {'gender': 'F', 'age': '21', 'sales': '001', 'bmi': 'Normal', 'salary': '12', 'birthday': '01/01/1996', 'valid': '1'}, 'Q001': {'gender': 'M', 'age': '45', 'sales': '999', 'bmi': 'Underweight', 'salary': '725', 'birthday': '31/12/1971', 'valid': '1'}, 'A002': {'gender': 'F', 'age': '21', 'sales': '001', 'bmi': 'Normal', 'salary': '12', 'birthday': '01/01/1996', 'valid': '1'}, 'A05': {'gender': 'F', 'age': '21', 'sales': '001', 'bmi': 'Normal', 'salary': '12', 'birthday': '01/01/1996', 'valid': '0'}} class_to_test = FileReader() # Action cmd_output = io.StringIO() sys.stdout = cmd_output with patch('builtins.input', side_effect=insert): result = class_to_test.write_to_database(data_to_test) sys.stdout = sys.__stdout__ # Assert try: self.assertTrue(expected_result in cmd_output.getvalue()) except AssertionError: print("{} Failed - Should be {}, but was {}.".format( test_name, expected_result, result)) else: print("{} Passed".format(test_name))
def _default(self): if self.user_string: separator = self.user_string else: separator = "," i = FileReader() FileReader.call_file(i, self.detail_mode, separator)
def main(argv): setpath() try: opts, args = getopt.getopt(argv,"ht:e:",["train=","test="]) if(len(sys.argv) < 5): raise getopt.GetoptError(None) except getopt.GetoptError: print '\nusage: run.py -t <trainfile> -e <testfile> \n' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'run.py -t <trainfile> -e <testfile>' sys.exit() elif opt in ("-t", "--train"): trainfile = arg elif opt in ("-e", "--test"): testfile = arg from file_reader import FileReader fr = FileReader(trainfile) training_Set= fr.getRows() #Readin the test file and creating the matrix from file_reader import FileReader test_File_Reader = FileReader(testfile) testing_Set= test_File_Reader.getRows() test_Result(logistic_Regression(training_Set),testing_Set)
def main(argv): setpath() try: opts, args = getopt.getopt(argv, "ht:e:", ["train=", "test="]) if (len(sys.argv) < 5): raise getopt.GetoptError(None) except getopt.GetoptError: print('\nusage: run.py -t <trainfile> -e <testfile>\n') sys.exit(2) for opt, arg in opts: if opt == '-h': print('run.py -t <trainfile> -e <testfile>') sys.exit() elif opt in ("-t", "--train"): trainfile = arg elif opt in ("-e", "--test"): testfile = arg from file_reader import FileReader fr = FileReader(testfile) from naive_bayes import NaiveBayes nb = NaiveBayes(trainfile) test_file_reader = FileReader(testfile) testData = test_file_reader.getRows() num_errors = 0 true_positive = 0 false_positive = 0 true_negative = 0 false_negative = 0 #Testing phase for idx, row in enumerate(testData): prediction = nb.binary_classify(row) if row[-1] != prediction: num_errors += 1.0 print("Error on row: %s" % str(idx + 1)) if row[-1] == '1': false_negative += 1 else: false_positive += 1 elif row[-1] == '0': true_negative += 1 else: true_positive += 1 print('\n\n--------------Error Count----------------') print(num_errors) print('\n\n--------------Accuracy----------------') print("\n\nThe Accuracy is " + str((len(testData) - num_errors) * 100 / len(testData)) + "%") print("\n===========The confusion matrix===========") print("\t No \t Yes") print("No \t", str(true_negative) + "\t", str(false_positive)) print("Yes \t", str(false_negative) + "\t", str(true_positive))
def main(argv): setpath() try: opts, args = getopt.getopt(argv,"ht:e:",["train=","test="]) if(len(sys.argv) < 5): raise getopt.GetoptError(None) except getopt.GetoptError: print('\nusage: run.py -t <trainfile> -e <testfile>\n') sys.exit(2) for opt, arg in opts: if opt == '-h': print('run.py -t <trainfile> -e <testfile>') sys.exit() elif opt in ("-t", "--train"): trainfile = arg elif opt in ("-e", "--test"): testfile = arg from file_reader import FileReader fr = FileReader(testfile) from naive_bayes import NaiveBayes nb = NaiveBayes(trainfile) test_file_reader = FileReader(testfile) testData = test_file_reader.getRows() num_errors = 0 true_positive = 0 false_positive = 0 true_negative = 0 false_negative = 0 #Testing phase for idx, row in enumerate(testData): prediction = nb.binary_classify(row) if row[-1] != prediction: num_errors += 1.0 print("Error on row: %s" % str(idx+1)) if row[-1] == '1': false_negative += 1 else: false_positive += 1 elif row[-1] == '0': true_negative += 1 else: true_positive += 1 print('\n\n--------------Error Count----------------') print(num_errors) print('\n\n--------------Accuracy----------------') print("\n\nThe Accuracy is " +str((len(testData) - num_errors)*100/len(testData)) + "%") print("\n===========The confusion matrix===========") print("\t No \t Yes") print("No \t", str(true_negative) + "\t", str(false_positive)) print("Yes \t", str(false_negative) +"\t", str(true_positive))
def main(): """Main function for calling others""" parsing = ArgumentParser() rooms_file, students_file = parsing.args_info() example = FileReader() rooms = example.file_reader(rooms_file) students = example.file_reader(students_file) new_info = Distribution(rooms, students).student_distribution() result = JsonExporter(new_info).unloading() print(result)
def main(): system = System(0.02) reader = FileReader(system) reader.read_file() global app app = QApplication(sys.argv) gui = GUI(system) sys.exit(app.exec_())
class PackingSystem: DEFAULT_PACKET_DATA_SIZE = 4096 PACKET_HEADER_SIZE = 4 def __init__(self): self.__data_size_in_bytes = PackingSystem.DEFAULT_PACKET_DATA_SIZE self.__packet_size_in_bytes = PackingSystem.DEFAULT_PACKET_DATA_SIZE + PackingSystem.PACKET_HEADER_SIZE self.__packet_number = 1 self.__header_size_in_bytes = PackingSystem.PACKET_HEADER_SIZE def pack_data(self): new_packet = SWPacket(self.__packet_size_in_bytes, self.__data_size_in_bytes, self.__header_size_in_bytes, packet_type=PacketType.DATA) new_packet.set_packet_number(self.__packet_number) self.__packet_number += 1 new_packet.store_data(self.__file_reader.read()) return new_packet def open_file(self, source_file): self.__file_reader = FileReader(source_file, self.__data_size_in_bytes) self.__file_reader.open() def close_file(self): self.__file_reader.close() def get_file_size(self): return self.__file_reader.get_file_size_in_bytes() def get_current_packet_number(self): return self.__packet_number def get_data_size_in_bytes(self): return self.__data_size_in_bytes def get_end_file_packet(self): end_packet = SWPacket(self.__packet_size_in_bytes, self.__data_size_in_bytes, self.__header_size_in_bytes, packet_type=PacketType.DATA) end_packet.make_end_packet() end_packet.set_packet_number(self.__packet_number) return end_packet def reset(self): self.__packet_number = 1 def set_data_size(self, new_size): self.__data_size_in_bytes = new_size self.__packet_size_in_bytes = new_size + PackingSystem.PACKET_HEADER_SIZE
def test_fetch_text_file_not_found_returns_nothing(self): # Arrange file_name = "testdata\\file_does_not_exist.txt" expected_result = None file = FileReader.fetch_text_file(file_name) # Act result = FileReader.fetch_text_contents(file, "", ",") # Assert self.assertTrue(result == expected_result)
def retrieve_best_k_related_reviews(K, query): """ Utilizing the Rocchio Classifier, finds and displays the k most relevant reviews to a given query, and their score. :param K: int, number of related documents desired :param query: string, the query to check the reviews against. :return: """ file_name = "./dataset/amazon_cells_labelled_full.txt" data = FileReader(file_name, True, True) tfidf_set, text_set = data.build_set('tfidf', file_name) parsed_query = data.parse_query(query) retrieve(K, parsed_query, tfidf_set, text_set)
def append_root(self, root): ''' 添加一个root ''' data = self.read() # 如果目录已存在,或是现有Root集的子路径,跳出;否则添加目录 for _root in data['Root']: if _root == root or _root in root: return data['Root'].append(root) # 初始化一个进度条 progress_bar = ProgressBar('Code Files') # 添加目录下的所有文件 file_reader = FileReader() file_list = file_reader.walk_folder(root) total_number = len(file_list) for index, file in enumerate(file_list): # 获取文件基本信息 ## 获取文件标识码 file_code = file_reader.code_file(file) ## 获取文件名和文件扩展名 file_basename = os.path.basename(file) file_name, file_extension = os.path.splitext(file_basename) ## 获取文件大小 file_size_num = (int)(file_code.split('-')[-1]) file_size_num = round(file_size_num / 1024 / 1024) file_size = str(file_size_num) + ' MB' if file_size_num < 1024 \ else str(round(file_size_num / 1024, 2)) + ' GB' # 如果在已有数据中找不到文件,进行添加 if file not in data['File']: data['File'][file] = { 'code': file_code, 'extension': file_extension, 'name': file_name, 'path': file, 'size': file_size, } # 进度条即时显示 progress_bar.set_value(index + 1, total_number) QApplication.processEvents() # 循环结束,关闭进度条 progress_bar.close() # 回填更新后的数据 self.write(data)
def test_05(self): x = FileReader() x.add_file("plant_uml") x.read_file() x.find_classes() if len(x.all_my_classes) > 0: pass
def test_five_hyphens_at_start_ends_read(self, mocked_open): """ Check that a file is read and the data added to the list_lines attribute, stopping reading when we see a line that starts with '-----' """ mocked_open.side_effect = [ mock.mock_open( read_data="CR\rCRLF\r\n-----LF\nNo end of file").return_value ] __file_path = "Path to File" test_object = FileReader() test_object.read_file(__file_path) mocked_open.assert_called_with(__file_path, "r") self.assertEqual(2, len(test_object.list_lines), "Should be 2 items in the list")
def __init__(self, filename): self._text = yaml.safe_load(open(filename, 'r')) data = FileReader(self._text) data.read() self._original_instructions = data.get_original_instructions() self._special_instructions = data.get_special_instructions() self._video_list = data.get_video_list() self._names = data.get_names()
def test_hash_at_start_skips_line(self, mocked_open): """ Check that a file is read and the data added to the list_lines attribute, ignoring lines that start with '#' """ mocked_open.side_effect = [ mock.mock_open( read_data="#CR\rCRLF\r\n# LF\nNo end of file").return_value ] __file_path = "Path to File" test_object = FileReader() test_object.read_file(__file_path) mocked_open.assert_called_with(__file_path, "r") self.assertEqual(2, len(test_object.list_lines), "Should be 2 items in the list")
def validate(file): fdict = FileReader(file).fdict delimiter = fdict['delimiter'] validator = ReportErrors(fdict,file,h1000lk) errors = validator.run() headers_fix = validator.set_to_fix() return file, fdict, errors, delimiter, headers_fix
class Controller: file = FileReader() def load_file(self, infile, temp_file=''): try: if ".txt" in infile[-4:]: self.file.add_file(infile) self.file.read_txt_file() self.file.find_classes() self.file.printProgram() elif ".csv" in infile[-4:]: self.file.read_csv_file(infile, temp_file) self.file.read_txt_file() self.file.find_classes() self.file.printProgram() else: message = "incorrect file format, please see help load" raise NameError(message) except NameError as e: print(e) except FileNotFoundError: print("File not found") except Exception as e: print(e)
def __init__(self, file_name): commands_raw = FileReader.to_list("../Day2/" + file_name) self.commands = [] for row in commands_raw: current_command_raw = row.split(" ") command = Command(current_command_raw[0], int(current_command_raw[1])) self.commands.append(command)
def metadata_main() -> None: """ Main function for handling metadata. """ logger.info(f"Commencing metadata transformation service.") with FileReader(args.filename) as fr: file_data = fr.load_json() if args.metadata_format.lower() == 'c': uuids_metadata = cantabular_metadata(file_data) elif args.metadata_format.lower() == 'o': uuids_metadata = ons_metadata(file_data) else: raise ValueError("Unrecognised metadata format.") if len(uuids_metadata) > 0: variable_metadata_requests = DatasetTransformations.variable_metadata_request( uuids_metadata) with NomisMetadataApiConnector( config.get_credentials('nomis_metadata'), config.get_client('nomis_metadata')) as metadata_connector: uuids = metadata_connector.add_new_metadata( variable_metadata_requests, return_uuids=True) logger.info( f"METADATA TRANSFORMATION SUCCESS. " f"Metadata was created for entities with the following UUIDS: {uuids}" ) else: logger.info("No metadata appended.")
def save_data(self): self.data_string += self.center_name.text() + '\n' self.data_string += self.center_location.text() + '\n' self.data_string += self.center_velocity.text() + '\n' self.data_string += self.center_acceleration.text() + '\n' self.data_string += self.center_mass.text() + '\n' self.data_string += self.center_density.text() + '\n' + '\n' + '\n' self.data_string += self.collision.text() + '\n' self.data_string += self.frame_rate.text() + '\n' self.data_string += self.time_step.text() + '\n' self.data_string += self.simulation_speed.text() + '\n' self.data_string += self.time_span.text() + '\n' + '\n' self.data_string += '----------------------------------\n' path = 'files/save.txt' FileReader.write_file(self, path)
def validate(self, file, id): self.invalid_data = [] self.clean_data_sets = [] file_reader = FileReader() file_data = file_reader.read_file(file) for data_dic in file_data: self.data_validate(data_dic) if self.invalid_data: if id == "file": return self.invalid_data + self.clean_data_sets if id == "db": return self.clean_data_sets else: return self.clean_data_sets
def main(): parser = argparse.ArgumentParser() parser.add_argument("path") parser.add_argument("dialog") parser.add_argument("log") args = parser.parse_args() path_arg = args.path.__str__() dialog_arg = args.dialog.__str__() log_arg = args.log.__str__() args_handler = ArgsHandler() path_dialog = args_handler.get_path_from_args( path_arg, "Veuillez saisir le bon chemin du fichier : \n") dialog = args_handler.check_input_response( dialog_arg, "Souhaitez-vous un dialogue (oui/non) ? \n") log = args_handler.check_input_response( log_arg, 'Souhaitez-vous enregistrer les informations (oui/non) ? \n') file_reader = FileReader(path_dialog) calcul = Calcul(file_reader) display = Display() if dialog == 'non': display.no_dialog(calcul, log, file_reader) elif dialog == 'oui': display.yes_dialog(calcul, log, file_reader)
def calculate(file_path, file_out_path): file_name = uuid.uuid4().hex with open(file_path) as file: text = file.readlines() calculating_data = FileReader(text, os.path.join(BASE_DIR, 'columns.json')) excel_parser = ExcelParser(calculating_data, file_out_path) excel_parser.export_document(file_name, calculating_data.columns) return file_name
def test_file_is_open_and_read(self, mocked_open): """ Check that a file is read and the data added to the list_lines attribute """ mocked_open.side_effect = [ mock.mock_open(read_data="Category\tName\tRANKED\tG1\tG2\r" "CRLF\r\n" "LF\n" "No end of file").return_value ] __file_path = "Path to File" test_object = FileReader() test_object.read_file(__file_path) mocked_open.assert_called_with(__file_path, "r") self.assertEqual(4, len(test_object.list_lines), "Should be 4 items in the list")
def unit_test_five(my_file): print("Run Test 5 - File Reader adds class") x = FileReader() x.add_file(my_file) x.read_file() x.find_classes() if len(x.all_my_classes) > 0: print("Class added from file") else: print("ERROR - class not added")
def main(): print("Selecione o tipo de instância:") print("1 - Large Scale") print("2 - Low Dimensional") instance_type_option = input() while (instance_type_option not in VALID_INSTANCE_OPTIONS): print("Opção inválida, digite outra") instance_type_option = input() file_names = FileReader.get_file_names( path=INSTANCE_OPTIONS_FOLDER_NAMES.get(instance_type_option)) for file_name in file_names: print(f"{file_names.index(file_name)} - {file_name}") print("Selecione uma instância:") instance_option = input() while not validate_instance_option(instance_option, file_names): print("Opção inválida, digite outra") instance_option = input() instance_reader = FileReader( path=INSTANCE_OPTIONS_FOLDER_NAMES.get(instance_type_option), file_name=file_names[int(instance_option)]) solution_reader = FileReader( path=INSTANCE_OPTIONS_SOLUTION_FOLDER_NAMES.get(instance_type_option), file_name=file_names[int(instance_option)]) optimum_value = solution_reader.parse_solution_data() instance_dict = instance_reader.parse_instance_data() evaluate_methods(optimum_value, instance_dict, f"{file_names[int(instance_option)]}")
def learn(): # splits() print("Done Splitting...") tester = Tester() test_set = tester.getTestSet() print("Finished tester Stuff") answers = [] reader = FileReader("training.txt") X = reader.read_file() print("Starting SVD..") svd = TruncatedSVD(n_components=10, n_iter=10, random_state=42) dense = svd.fit_transform(X) print("Done with SVD, starting K Means...") km = KMeans(n_clusters=100) ans = km.fit_predict(dense) print("Done with K Means...") inverseAns = {cluster: [] for cluster in range(100)} # centroids = svd.inverse_transform(km.cluster_centers_) for trainingProdKey, trainingProdIndex in reader.product.items(): inverseAns[ans[trainingProdIndex]].append(trainingProdKey) print('Done inverting clusters') i = 0 for prod in test_set: # print("Inside Loop") answers.append(predict(prod, reader.product, ans, inverseAns)) if i % (len(test_set) // 100) == 0: print("\rDone with {}% of predicting...".format(i / len(test_set)), end='') i = i + 1 print() print(tester.checkAnswers(answers))
def test_convert_file_lines_to_puzzle_with_blank_squares(self): lines_of_file = [ '_ 2 3 _\n', '2 _ 4 1\n', '3 4 _ 2\n', '4 1 _ _\n' ] expected = [ [ Square(None,1,1,1), Square(2,1,2,1), Square(3,1,3,2), Square(None,1,4,2) ], [ Square(2,2,1,1), Square(None,2,2,1), Square(4,2,3,2), Square(1,2,4,2) ], [ Square(3,3,1,3), Square(4,3,2,3), Square(None,3,3,4), Square(2,3,4,4) ], [ Square(4,4,1,3), Square(1,4,2,3), Square(None,4,3,4), Square(None,4,4,4) ] ] file_reader = FileReader() actual = file_reader._FileReader__convert_file_lines_to_puzzle(lines_of_file) for i in range(4): for j in range(4): self.assertEqual(expected[i][j].number, actual[i][j].number) self.assertEqual(expected[i][j].row, actual[i][j].row) self.assertEqual(expected[i][j].column, actual[i][j].column) self.assertEqual(expected[i][j].block, actual[i][j].block)
class IndexReader(object): def __init__(self, year=YEAR, quarter=QUARTER): self.year = year self.quarter = quarter self.fr = FileReader() # self.download_index_file() self.company_dictionary = self.build_dictionary() def download_index_file(self): index_name = 'edgar/full-index/{0}/{1}/company.idx'.format(self.year, self.quarter) local_file = 'files/index/' self.fr.download(index_name, local_file) def build_dictionary(self): company_dictionary = {} f = open('files/index/company.idx', 'rb') line_num = 1 for line in f: if line_num < 11: line_num += 1 continue line = line.split(' ') count = 0 values_extracted = {} for item in line: if item != '': values_extracted[count] = item count += 1 company = values_extracted[0].strip() type = values_extracted[1].strip() cik = values_extracted[2].strip() date_filed = values_extracted[3].strip() file_name = values_extracted[4].strip() if type in ['10-Q', '8-K', '10-K']: if company not in company_dictionary: company_dictionary[company] = [] company_dictionary[company].append({'type': type, 'cik': cik, 'date_filed': date_filed, 'file_name': file_name}) return company_dictionary def get_company(self, company_name): for key, value in self.company_dictionary.iteritems(): if key.startswith(company_name): for file_dict in value: print file_dict
class TestFileReader(unittest.TestCase): TEST_FILENAME = 'test_file.csv' EXPECTED_HEADER = ('header_a', 'header_b') EXPECTED_ROWS = ( ('val_a1', 'val_b1'), ('val_a2', 'val_b2'), ('val_a3', 'val_b3'), ('val_a4', 'val_b4'), ('val_a5', 'val_b5'), ) @classmethod def setUpClass(cls): with open(cls.TEST_FILENAME, 'w') as f: csv_writer = csv.writer(f) rows = (cls.EXPECTED_HEADER, ) + cls.EXPECTED_ROWS for r in rows: csv_writer.writerow(r) def setUp(self): self._csv_file = FileReader(self.TEST_FILENAME, has_header=True) def tearDown(self): self._csv_file.close() @classmethod def tearDownClass(cls): if os.path.isfile(cls.TEST_FILENAME): os.remove(cls.TEST_FILENAME) def test_read_first_row(self): row = self._csv_file.get_next_row() self.assertEqual(tuple(row), self.EXPECTED_ROWS[0]) def test_header(self): self.assertEqual( tuple(self._csv_file.header), self.EXPECTED_HEADER ) def test_iterable(self): for rows, expected in zip(self._csv_file, self.EXPECTED_ROWS): self.assertEqual(tuple(rows), expected)
def calcCounts(self): # Missing labels not yet added. fr = FileReader(self._file) rows = fr.getRows() self._num_features = len(rows[0]) - 1 feature_count = {} label_count = {} label_count['total'] = 0 for row in rows: label_count['total'] += 1 if label_count.get(row[-1]) is None: label_count[row[-1]] = 1 else: label_count[row[-1]] += 1 if feature_count.get(row[-1]) is None: feature_count[row[-1]] = {} for i in range(self._num_features): feature_count[row[-1]][str(i)] = {} feature_count[row[-1]][str(i)]['total'] = 0 for i in range(self._num_features): feature_i = feature_count[row[-1]][str(i)] value = feature_i.get(row[i]) feature_i[ row[i]] = 1 if value is None else feature_i[row[i]] + 1 feature_count[row[-1]][str(i)]['total'] += 1 for label in feature_count: for feature in feature_count[label]: feature_values = set() for l in filter(lambda x: x != 'total', feature_count): for value in feature_count[l][feature]: feature_values.add(value) for value in feature_values: if feature_count[label][feature].get(value) is None: feature_count[label][feature][value] = 0 return (label_count, feature_count)
def main(): args = parse_args() try: rooms = FileReader(args.rooms_path).read() students = FileReader(args.students_path).read() except FileNotFoundError as e: print(e) return student_rooms = [] for room in rooms: student_rooms.append(StudentRoom(Room(room["id"], room["name"]))) for student in students: student_rooms[student["room"]].students.append( Student(student["id"], student["name"])) formats = {"json": JSONWriter(), "xml": XMLWriter()} formats[args.format].write(student_rooms)
def test_run(data_type, lower_and_remove_punctuation, remove_stop_words, distance_method): """ Performs a test run, according to the given parameters :param data_type: Defines how to store the sentences, expects: 'boolean' / 'tf' / 'tfidf' :param lower_and_remove_punctuation: bool, if true turns all words to lower case and removes punctuation :param remove_stop_words: bool, if true removes all stop words :param distance_method: defines how to calculate distance, expects: 'euclidean' / 'cosine' :return: accuracy, the accuracy of the test run """ file_name = "./dataset/amazon_cells_labelled_full.txt" train_file_name = "./dataset/amazon_cells_labelled_train.txt" test_file_name = "./dataset/amazon_cells_labelled_test.txt" data = FileReader(file_name, lower_and_remove_punctuation, remove_stop_words) train_set, _ = data.build_set(data_type, train_file_name) test_set, _ = data.build_set(data_type, test_file_name) classifier = RocchioClassifier(train_set) accuracy = calc_accuracy(test_set, classifier, distance_method) return accuracy
def calcCounts(self): # Missing labels not yet added. fr = FileReader(self._file) rows = fr.getRows() self._num_features = len(rows[0])-1 feature_count = {} label_count = {} label_count['total'] = 0 for row in rows: label_count['total'] += 1 if label_count.get(row[-1]) is None: label_count[row[-1]] = 1 else: label_count[row[-1]] += 1 if feature_count.get(row[-1]) is None: feature_count[row[-1]] = {} for i in range(self._num_features): feature_count[row[-1]][str(i)] = {} feature_count[row[-1]][str(i)]['total'] = 0 for i in range(self._num_features): feature_i = feature_count[row[-1]][str(i)] value = feature_i.get(row[i]) feature_i[row[i]] = 1 if value is None else feature_i[row[i]]+1 feature_count[row[-1]][str(i)]['total'] += 1 for label in feature_count: for feature in feature_count[label]: feature_values = set() for l in filter(lambda x: x != 'total', feature_count): for value in feature_count[l][feature]: feature_values.add(value) for value in feature_values: if feature_count[label][feature].get(value) is None: feature_count[label][feature][value] = 0 return(label_count, feature_count)
def search_paths(self, search_path, kernel_file_name): """ Search a given search path and sub paths. :param search_path: Path to search for kernel files. :param kernel_file_name: Name of kernel file. :returns: Content of kernel file. :rtype: String. """ try: return FileReader.file_as_string( os.path.join(search_path, 'kernels'), kernel_file_name) except IOError: try: return FileReader.file_as_string( os.path.join(search_path, 'kernels', self.similarity_measure_name), kernel_file_name) except IOError: return ''
def main(): webpages_dir = os.path.join(util.ROOT, 'data/weps2007_data_1.1/traininig/web_pages') fe = FeatureExtractor() ff = FeatureFilter() for name in os.listdir(webpages_dir): print 'begin clustering %s' % name reader = FileReader(webpages_dir, name) description = reader.read_description() pc = PersonCorpus(name) fm = FeatureMapper() for rank in description: doc_meta = {} html_path = os.path.join(webpages_dir, name, 'raw', rank, 'index.html') content = text_extract(html_path) features, wordcount = fe.extract(content) doc_meta['word_num'] = wordcount good_features = ff.filter(features) vec = FeatureVector(good_features, fm) pc.add_vector(vec) pc.compute_matrix() pc.dump_matrix()
def _read(self): cluster_indexes = dict() distances = dict() count = 0 start = time.time() reader = FileReader(self._input_file, 64, self._process) for ci, ds, c in reader.read(): distances.update(ds) count += c for x in ci: if x not in cluster_indexes: cluster_indexes[x] = np.array(ci[x], dtype="int64") else: cluster_indexes[x] = np.append(cluster_indexes[x], ci[x]) assert count == len(distances), \ "ids count: {}, record count: {}".format( len(distances), count) print("Read data done, read {} records, elapsed: {}".format( count, time.time() - start)) return cluster_indexes, distances, count
class FileReaderTest(unittest.TestCase): def setUp(self): self.fr = FileReader('test_data') def test__check_line(self): self.assertRaises(WrongData, self.fr._check_line, 'AUAGCx') self.assertRaises(WrongData, self.fr._check_line, 'aUGCA') self.assertRaises(WrongData, self.fr._check_line, 'A') try: self.fr._check_line('ACGCGCGCGCGCGAAAUUUU') except: self.fail() def test_get_one_line(self): l = self.fr.get_one_line() self.assertEqual(l, 'GCGCGCGC') def test_iteration(self): count = 0 for x in self.fr: count += 1 self.assertEqual(count, 3)
import sys from solver import Solver from file_reader import FileReader if len(sys.argv) != 2: print("Please specify a puzzle file to solve.") sys.exit(0) filename = sys.argv[1] file_reader = FileReader() puzzle = file_reader.get_puzzle_from_file(filename) solver = Solver() if solver.is_valid(puzzle): print('Okay, I can solve this.') solution = solver.solve(puzzle) for row in solution: for square in row: print(str(square.number) + ' ', end='') print('\n', end='') else: print('This puzzle is invalid.')
def main(argv): setpath() try: opts, args = getopt.getopt(argv,"ht:e:d:",["train=","test=","maxDepth="]) if(len(sys.argv) < 7): raise getopt.GetoptError(None) except getopt.GetoptError: print '\nusage: run.py -t <trainfile> -e <testfile> -d <maxDepth>\n' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'run.py -t <trainfile> -e <testfile> -d <maxDepth>' sys.exit() elif opt in ("-t", "--train"): trainfile = arg elif opt in ("-e", "--test"): testfile = arg elif opt in ("-d", "--maxDepth"): maxDepth = int(arg) from file_reader import FileReader fr = FileReader(trainfile) from decision_tree_builder import DecisionTreeBuilder # getRows() returns a dataMatrix; dtb = DecisionTreeBuilder(fr.getRows()) print ('Features: {}'.format(fr.featureNames)) root = dtb.build(maxDepth) print('Tree Building Complete and Successful') print('Height of the tree is {}'.format(dtb.decisionTree.height())) #Testing section #create a zero initialized confusion matrix confusion_matrix=[[0 for j in range(len(fr.getClassLabels()))] for j in range(len(fr.getClassLabels()))] #read the test file testFile_Reader = FileReader(testfile) dataMatrix_testFile = testFile_Reader.getRows() Error_Count =0 No =0 Yes=0 No_Error= 0 Yes_error=0 Total_records = len(dataMatrix_testFile)+0.0 #Testing phase for row in dataMatrix_testFile: predicted_classLabel = dtb.predict(row) print ('\tActual Label is {}, and Predicted Label is {}'.format(row[len(row)-1], predicted_classLabel)) #confusion_matrix[int(row[len(row)-1])-1][int(predicted_classLabel)-1]+= 1 if not row[len(row)-1]==predicted_classLabel: Error_Count += 1.0 #To print confusion matrix for zoo data set uncomment line 66 print ('\n\n------------------Confusion Matrix----------') for row in confusion_matrix: print row print('\n\n--------------Error Count----------------') print Error_Count print('\n\n--------------Accuracy----------------') print (Total_records-Error_Count)/Total_records
def setUp(self): self._csv_file = FileReader(self.TEST_FILENAME, has_header=True)
def setUp(self): self.fr = FileReader('test_data')
def __init__(self, year=YEAR, quarter=QUARTER): self.year = year self.quarter = quarter self.fr = FileReader() # self.download_index_file() self.company_dictionary = self.build_dictionary()