def test_fetch_text_file_with_correct_data(self):
        # Arrange
        file_name = "testdata\\test_data_2_rows.txt"
        line1 = {
            'gender': 'F',
            'age': '21',
            'sales': '001',
            'bmi': 'Normal',
            'salary': '12',
            'birthday': '01/01/1996',
            'valid': '1'
        }
        line2 = {
            'gender': 'M',
            'age': '45',
            'sales': '999',
            'bmi': 'Underweight',
            'salary': '725',
            'birthday': '31/12/1971',
            'valid': '1'
        }
        expected_result = {'A001': line1, 'Q001': line2}
        file = FileReader.fetch_text_file(file_name)

        # Act
        result = FileReader.fetch_text_contents(file, "", ",")

        # Assert
        self.assertTrue(result == expected_result)
    def test_write_to_database_with_display_data(self):
        # Arrange
        test_name = "Write to Database with displaying data Test #02"
        insert = "Y"  # see data saved to database
        expected_result = "['A001', 'F,', '21,', '001,', 'Normal,', '12,', '01/01/1996,', '1']"
        data_to_test = {'A001': {'gender': 'F', 'age': '21', 'sales': '001', 'bmi': 'Normal', 'salary': '12',
                                 'birthday': '01/01/1996', 'valid': '1'},
                        'Q001': {'gender': 'M', 'age': '45', 'sales': '999', 'bmi': 'Underweight', 'salary': '725',
                                 'birthday': '31/12/1971', 'valid': '1'},
                        'A002': {'gender': 'F', 'age': '21', 'sales': '001', 'bmi': 'Normal', 'salary': '12',
                                 'birthday': '01/01/1996', 'valid': '1'},
                        'A05': {'gender': 'F', 'age': '21', 'sales': '001', 'bmi': 'Normal', 'salary': '12',
                                'birthday': '01/01/1996', 'valid': '0'}}
        class_to_test = FileReader()

        # Action
        cmd_output = io.StringIO()
        sys.stdout = cmd_output
        with patch('builtins.input', side_effect=insert):
            result = class_to_test.write_to_database(data_to_test)
        sys.stdout = sys.__stdout__

        # Assert
        try:
            self.assertTrue(expected_result in cmd_output.getvalue())
        except AssertionError:
            print("{} Failed - Should be {}, but was {}.".format(
                test_name,
                expected_result,
                result))
        else:
            print("{} Passed".format(test_name))
Beispiel #3
0
 def _default(self):
     if self.user_string:
         separator = self.user_string
     else:
         separator = ","
     i = FileReader()
     FileReader.call_file(i, self.detail_mode, separator)
Beispiel #4
0
def main(argv):
  setpath()
  try:
    opts, args = getopt.getopt(argv,"ht:e:",["train=","test="])
    if(len(sys.argv) < 5):
      raise getopt.GetoptError(None)

  except getopt.GetoptError:
    print '\nusage: run.py -t <trainfile> -e <testfile> \n'
    sys.exit(2)
  for opt, arg in opts:
    if opt == '-h':
      print 'run.py -t <trainfile> -e <testfile>'
      sys.exit()
    elif opt in ("-t", "--train"):
       trainfile = arg
    elif opt in ("-e", "--test"):
       testfile = arg


  from file_reader import FileReader
  fr = FileReader(trainfile)
  training_Set= fr.getRows()

  #Readin the test file and creating the matrix
  from file_reader import FileReader
  test_File_Reader = FileReader(testfile)

  testing_Set= test_File_Reader.getRows()
  test_Result(logistic_Regression(training_Set),testing_Set)
Beispiel #5
0
def main(argv):
    setpath()
    try:
        opts, args = getopt.getopt(argv, "ht:e:", ["train=", "test="])
        if (len(sys.argv) < 5):
            raise getopt.GetoptError(None)

    except getopt.GetoptError:
        print('\nusage: run.py -t <trainfile> -e <testfile>\n')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('run.py -t <trainfile> -e <testfile>')
            sys.exit()
        elif opt in ("-t", "--train"):
            trainfile = arg
        elif opt in ("-e", "--test"):
            testfile = arg

    from file_reader import FileReader
    fr = FileReader(testfile)
    from naive_bayes import NaiveBayes
    nb = NaiveBayes(trainfile)

    test_file_reader = FileReader(testfile)
    testData = test_file_reader.getRows()

    num_errors = 0
    true_positive = 0
    false_positive = 0
    true_negative = 0
    false_negative = 0

    #Testing phase
    for idx, row in enumerate(testData):
        prediction = nb.binary_classify(row)
        if row[-1] != prediction:
            num_errors += 1.0
            print("Error on row: %s" % str(idx + 1))
            if row[-1] == '1':
                false_negative += 1
            else:
                false_positive += 1
        elif row[-1] == '0':
            true_negative += 1
        else:
            true_positive += 1

    print('\n\n--------------Error Count----------------')
    print(num_errors)
    print('\n\n--------------Accuracy----------------')

    print("\n\nThe Accuracy is " +
          str((len(testData) - num_errors) * 100 / len(testData)) + "%")
    print("\n===========The confusion matrix===========")
    print("\t No \t Yes")
    print("No \t", str(true_negative) + "\t", str(false_positive))
    print("Yes \t", str(false_negative) + "\t", str(true_positive))
Beispiel #6
0
def main(argv):
  setpath()
  try:
    opts, args = getopt.getopt(argv,"ht:e:",["train=","test="])
    if(len(sys.argv) < 5):
      raise getopt.GetoptError(None)

  except getopt.GetoptError:
    print('\nusage: run.py -t <trainfile> -e <testfile>\n')
    sys.exit(2)
  for opt, arg in opts:
    if opt == '-h':
      print('run.py -t <trainfile> -e <testfile>')
      sys.exit()
    elif opt in ("-t", "--train"):
       trainfile = arg
    elif opt in ("-e", "--test"):
       testfile = arg

  from file_reader import FileReader
  fr = FileReader(testfile)
  from naive_bayes import NaiveBayes
  nb = NaiveBayes(trainfile)

  test_file_reader = FileReader(testfile)
  testData = test_file_reader.getRows()

  num_errors = 0
  true_positive = 0
  false_positive = 0
  true_negative = 0
  false_negative = 0

  #Testing phase
  for idx, row in enumerate(testData):
    prediction = nb.binary_classify(row)
    if row[-1] != prediction:
      num_errors += 1.0
      print("Error on row: %s" % str(idx+1))
      if row[-1] == '1':
        false_negative += 1
      else:
        false_positive += 1
    elif row[-1] == '0':
      true_negative += 1
    else:
      true_positive += 1

  print('\n\n--------------Error Count----------------')
  print(num_errors)
  print('\n\n--------------Accuracy----------------')

  print("\n\nThe Accuracy is " +str((len(testData) - num_errors)*100/len(testData)) + "%")
  print("\n===========The confusion matrix===========")
  print("\t No \t Yes")
  print("No \t", str(true_negative) + "\t", str(false_positive))
  print("Yes \t", str(false_negative) +"\t", str(true_positive))
Beispiel #7
0
def main():
    """Main function for calling others"""
    parsing = ArgumentParser()
    rooms_file, students_file = parsing.args_info()
    example = FileReader()
    rooms = example.file_reader(rooms_file)
    students = example.file_reader(students_file)
    new_info = Distribution(rooms, students).student_distribution()
    result = JsonExporter(new_info).unloading()
    print(result)
Beispiel #8
0
def main():

    system = System(0.02)
    reader = FileReader(system)
    reader.read_file()

    global app
    app = QApplication(sys.argv)
    gui = GUI(system)
    sys.exit(app.exec_())
Beispiel #9
0
class PackingSystem:
    DEFAULT_PACKET_DATA_SIZE = 4096
    PACKET_HEADER_SIZE = 4

    def __init__(self):
        self.__data_size_in_bytes = PackingSystem.DEFAULT_PACKET_DATA_SIZE
        self.__packet_size_in_bytes = PackingSystem.DEFAULT_PACKET_DATA_SIZE + PackingSystem.PACKET_HEADER_SIZE

        self.__packet_number = 1

        self.__header_size_in_bytes = PackingSystem.PACKET_HEADER_SIZE

    def pack_data(self):
        new_packet = SWPacket(self.__packet_size_in_bytes,
                              self.__data_size_in_bytes,
                              self.__header_size_in_bytes,
                              packet_type=PacketType.DATA)
        new_packet.set_packet_number(self.__packet_number)
        self.__packet_number += 1
        new_packet.store_data(self.__file_reader.read())
        return new_packet

    def open_file(self, source_file):
        self.__file_reader = FileReader(source_file, self.__data_size_in_bytes)
        self.__file_reader.open()

    def close_file(self):
        self.__file_reader.close()

    def get_file_size(self):
        return self.__file_reader.get_file_size_in_bytes()

    def get_current_packet_number(self):
        return self.__packet_number

    def get_data_size_in_bytes(self):
        return self.__data_size_in_bytes

    def get_end_file_packet(self):
        end_packet = SWPacket(self.__packet_size_in_bytes,
                              self.__data_size_in_bytes,
                              self.__header_size_in_bytes,
                              packet_type=PacketType.DATA)

        end_packet.make_end_packet()
        end_packet.set_packet_number(self.__packet_number)

        return end_packet

    def reset(self):
        self.__packet_number = 1

    def set_data_size(self, new_size):
        self.__data_size_in_bytes = new_size
        self.__packet_size_in_bytes = new_size + PackingSystem.PACKET_HEADER_SIZE
    def test_fetch_text_file_not_found_returns_nothing(self):
        # Arrange
        file_name = "testdata\\file_does_not_exist.txt"
        expected_result = None
        file = FileReader.fetch_text_file(file_name)

        # Act
        result = FileReader.fetch_text_contents(file, "", ",")

        # Assert
        self.assertTrue(result == expected_result)
Beispiel #11
0
def retrieve_best_k_related_reviews(K, query):
    """
    Utilizing the Rocchio Classifier,
    finds and displays the k most relevant reviews to a given query, and their score.
    :param K: int, number of related documents desired
    :param query: string, the query to check the reviews against.
    :return:
    """
    file_name = "./dataset/amazon_cells_labelled_full.txt"
    data = FileReader(file_name, True, True)
    tfidf_set, text_set = data.build_set('tfidf', file_name)
    parsed_query = data.parse_query(query)
    retrieve(K, parsed_query, tfidf_set, text_set)
Beispiel #12
0
    def append_root(self, root):
        ''' 添加一个root '''

        data = self.read()

        # 如果目录已存在,或是现有Root集的子路径,跳出;否则添加目录
        for _root in data['Root']:
            if _root == root or _root in root:
                return
        data['Root'].append(root)

        # 初始化一个进度条
        progress_bar = ProgressBar('Code Files')

        # 添加目录下的所有文件
        file_reader = FileReader()
        file_list = file_reader.walk_folder(root)
        total_number = len(file_list)
        for index, file in enumerate(file_list):

            # 获取文件基本信息
            ## 获取文件标识码
            file_code = file_reader.code_file(file)
            ## 获取文件名和文件扩展名
            file_basename = os.path.basename(file)
            file_name, file_extension = os.path.splitext(file_basename)
            ## 获取文件大小
            file_size_num = (int)(file_code.split('-')[-1])
            file_size_num = round(file_size_num / 1024 / 1024)
            file_size = str(file_size_num) + ' MB' if file_size_num < 1024 \
                else str(round(file_size_num / 1024, 2)) + ' GB'

            # 如果在已有数据中找不到文件,进行添加
            if file not in data['File']:
                data['File'][file] = {
                    'code': file_code,
                    'extension': file_extension,
                    'name': file_name,
                    'path': file,
                    'size': file_size,
                }

            # 进度条即时显示
            progress_bar.set_value(index + 1, total_number)
            QApplication.processEvents()

        # 循环结束,关闭进度条
        progress_bar.close()
        # 回填更新后的数据
        self.write(data)
Beispiel #13
0
 def test_05(self):
     x = FileReader()
     x.add_file("plant_uml")
     x.read_file()
     x.find_classes()
     if len(x.all_my_classes) > 0:
         pass
Beispiel #14
0
 def test_five_hyphens_at_start_ends_read(self, mocked_open):
     """
     Check that a file is read and the data added to the list_lines
     attribute, stopping reading when we see a line that starts with '-----'
     """
     mocked_open.side_effect = [
         mock.mock_open(
             read_data="CR\rCRLF\r\n-----LF\nNo end of file").return_value
     ]
     __file_path = "Path to File"
     test_object = FileReader()
     test_object.read_file(__file_path)
     mocked_open.assert_called_with(__file_path, "r")
     self.assertEqual(2, len(test_object.list_lines),
                      "Should be 2 items in the list")
 def __init__(self, filename):
     self._text = yaml.safe_load(open(filename, 'r'))
     data = FileReader(self._text)
     data.read()
     self._original_instructions = data.get_original_instructions()
     self._special_instructions = data.get_special_instructions()
     self._video_list = data.get_video_list()
     self._names = data.get_names()
Beispiel #16
0
 def test_hash_at_start_skips_line(self, mocked_open):
     """
     Check that a file is read and the data added to the list_lines
     attribute, ignoring lines that start with '#'
     """
     mocked_open.side_effect = [
         mock.mock_open(
             read_data="#CR\rCRLF\r\n# LF\nNo end of file").return_value
     ]
     __file_path = "Path to File"
     test_object = FileReader()
     test_object.read_file(__file_path)
     mocked_open.assert_called_with(__file_path, "r")
     self.assertEqual(2, len(test_object.list_lines),
                      "Should be 2 items in the list")
def validate(file):
    fdict = FileReader(file).fdict 
    delimiter = fdict['delimiter']
    validator = ReportErrors(fdict,file,h1000lk)     
    errors = validator.run()
    headers_fix =  validator.set_to_fix()    
    return file, fdict, errors, delimiter, headers_fix
class Controller:
    file = FileReader()

    def load_file(self, infile, temp_file=''):
        try:
            if ".txt" in infile[-4:]:
                self.file.add_file(infile)
                self.file.read_txt_file()
                self.file.find_classes()
                self.file.printProgram()
            elif ".csv" in infile[-4:]:
                self.file.read_csv_file(infile, temp_file)
                self.file.read_txt_file()
                self.file.find_classes()
                self.file.printProgram()
            else:
                message = "incorrect file format, please see help load"
                raise NameError(message)

        except NameError as e:
            print(e)
        except FileNotFoundError:
            print("File not found")
        except Exception as e:
            print(e)
 def __init__(self, file_name):
     commands_raw = FileReader.to_list("../Day2/" + file_name)
     self.commands = []
     for row in commands_raw:
         current_command_raw = row.split(" ")
         command = Command(current_command_raw[0], int(current_command_raw[1]))
         self.commands.append(command)
Beispiel #20
0
def metadata_main() -> None:
    """
    Main function for handling metadata.
    """
    logger.info(f"Commencing metadata transformation service.")

    with FileReader(args.filename) as fr:
        file_data = fr.load_json()
    if args.metadata_format.lower() == 'c':
        uuids_metadata = cantabular_metadata(file_data)
    elif args.metadata_format.lower() == 'o':
        uuids_metadata = ons_metadata(file_data)
    else:
        raise ValueError("Unrecognised metadata format.")

    if len(uuids_metadata) > 0:
        variable_metadata_requests = DatasetTransformations.variable_metadata_request(
            uuids_metadata)
        with NomisMetadataApiConnector(
                config.get_credentials('nomis_metadata'),
                config.get_client('nomis_metadata')) as metadata_connector:
            uuids = metadata_connector.add_new_metadata(
                variable_metadata_requests, return_uuids=True)
        logger.info(
            f"METADATA TRANSFORMATION SUCCESS. "
            f"Metadata was created for entities with the following UUIDS: {uuids}"
        )

    else:
        logger.info("No metadata appended.")
Beispiel #21
0
 def save_data(self):
     self.data_string += self.center_name.text() + '\n'
     self.data_string += self.center_location.text() + '\n'
     self.data_string += self.center_velocity.text() + '\n'
     self.data_string += self.center_acceleration.text() + '\n'
     self.data_string += self.center_mass.text() + '\n'
     self.data_string += self.center_density.text() + '\n' + '\n' + '\n'
     self.data_string += self.collision.text() + '\n'
     self.data_string += self.frame_rate.text() + '\n'
     self.data_string += self.time_step.text() + '\n'
     self.data_string += self.simulation_speed.text() + '\n'
     self.data_string += self.time_span.text() + '\n' + '\n'
     self.data_string += '----------------------------------\n'
     
     path = 'files/save.txt'
     FileReader.write_file(self, path)
Beispiel #22
0
    def validate(self, file, id):
        self.invalid_data = []
        self.clean_data_sets = []

        file_reader = FileReader()
        file_data = file_reader.read_file(file)
        for data_dic in file_data:
            self.data_validate(data_dic)

        if self.invalid_data:
            if id == "file":
                return self.invalid_data + self.clean_data_sets
            if id == "db":
                return self.clean_data_sets
        else:
            return self.clean_data_sets
Beispiel #23
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("path")
    parser.add_argument("dialog")
    parser.add_argument("log")
    args = parser.parse_args()
    path_arg = args.path.__str__()
    dialog_arg = args.dialog.__str__()
    log_arg = args.log.__str__()

    args_handler = ArgsHandler()

    path_dialog = args_handler.get_path_from_args(
        path_arg, "Veuillez saisir le bon chemin du fichier : \n")
    dialog = args_handler.check_input_response(
        dialog_arg, "Souhaitez-vous un dialogue (oui/non) ? \n")
    log = args_handler.check_input_response(
        log_arg, 'Souhaitez-vous enregistrer les informations (oui/non) ? \n')

    file_reader = FileReader(path_dialog)
    calcul = Calcul(file_reader)
    display = Display()

    if dialog == 'non':
        display.no_dialog(calcul, log, file_reader)

    elif dialog == 'oui':
        display.yes_dialog(calcul, log, file_reader)
Beispiel #24
0
def calculate(file_path, file_out_path):
    file_name = uuid.uuid4().hex
    with open(file_path) as file:
        text = file.readlines()
    calculating_data = FileReader(text, os.path.join(BASE_DIR, 'columns.json'))
    excel_parser = ExcelParser(calculating_data, file_out_path)
    excel_parser.export_document(file_name, calculating_data.columns)
    return file_name
Beispiel #25
0
 def test_file_is_open_and_read(self, mocked_open):
     """
     Check that a file is read and the data added to the list_lines
     attribute
     """
     mocked_open.side_effect = [
         mock.mock_open(read_data="Category\tName\tRANKED\tG1\tG2\r"
                        "CRLF\r\n"
                        "LF\n"
                        "No end of file").return_value
     ]
     __file_path = "Path to File"
     test_object = FileReader()
     test_object.read_file(__file_path)
     mocked_open.assert_called_with(__file_path, "r")
     self.assertEqual(4, len(test_object.list_lines),
                      "Should be 4 items in the list")
Beispiel #26
0
def unit_test_five(my_file):
    print("Run Test 5 - File Reader adds class")
    x = FileReader()
    x.add_file(my_file)
    x.read_file()
    x.find_classes()
    if len(x.all_my_classes) > 0:
        print("Class added from file")
    else:
        print("ERROR - class not added")
def main():
    print("Selecione o tipo de instância:")
    print("1 - Large Scale")
    print("2 - Low Dimensional")
    instance_type_option = input()

    while (instance_type_option not in VALID_INSTANCE_OPTIONS):
        print("Opção inválida, digite outra")
        instance_type_option = input()

    file_names = FileReader.get_file_names(
        path=INSTANCE_OPTIONS_FOLDER_NAMES.get(instance_type_option))

    for file_name in file_names:
        print(f"{file_names.index(file_name)} - {file_name}")

    print("Selecione uma instância:")

    instance_option = input()
    while not validate_instance_option(instance_option, file_names):
        print("Opção inválida, digite outra")
        instance_option = input()

    instance_reader = FileReader(
        path=INSTANCE_OPTIONS_FOLDER_NAMES.get(instance_type_option),
        file_name=file_names[int(instance_option)])
    solution_reader = FileReader(
        path=INSTANCE_OPTIONS_SOLUTION_FOLDER_NAMES.get(instance_type_option),
        file_name=file_names[int(instance_option)])

    optimum_value = solution_reader.parse_solution_data()
    instance_dict = instance_reader.parse_instance_data()
    evaluate_methods(optimum_value, instance_dict,
                     f"{file_names[int(instance_option)]}")
Beispiel #28
0
def learn():
    # splits()
    print("Done Splitting...")

    tester = Tester()
    test_set = tester.getTestSet()

    print("Finished tester Stuff")

    answers = []

    reader = FileReader("training.txt")
    X = reader.read_file()

    print("Starting SVD..")

    svd = TruncatedSVD(n_components=10, n_iter=10, random_state=42)
    dense = svd.fit_transform(X)

    print("Done with SVD, starting K Means...")

    km = KMeans(n_clusters=100)
    ans = km.fit_predict(dense)

    print("Done with K Means...")

    inverseAns = {cluster: [] for cluster in range(100)}
    # centroids = svd.inverse_transform(km.cluster_centers_)
    for trainingProdKey, trainingProdIndex in reader.product.items():
        inverseAns[ans[trainingProdIndex]].append(trainingProdKey)

    print('Done inverting clusters')

    i = 0
    for prod in test_set:
        # print("Inside Loop")
        answers.append(predict(prod, reader.product, ans, inverseAns))

        if i % (len(test_set) // 100) == 0:
            print("\rDone with {}% of predicting...".format(i / len(test_set)),
                  end='')
        i = i + 1

    print()
    print(tester.checkAnswers(answers))
    def test_convert_file_lines_to_puzzle_with_blank_squares(self):
        lines_of_file = [
                            '_ 2 3 _\n',
                            '2 _ 4 1\n',
                            '3 4 _ 2\n',
                            '4 1 _ _\n'
                        ]
        expected = [
                    [
                        Square(None,1,1,1),
                        Square(2,1,2,1),
                        Square(3,1,3,2),
                        Square(None,1,4,2)
                    ],
                    [
                        Square(2,2,1,1),
                        Square(None,2,2,1),
                        Square(4,2,3,2),
                        Square(1,2,4,2)
                    ],
                    [
                        Square(3,3,1,3),
                        Square(4,3,2,3),
                        Square(None,3,3,4),
                        Square(2,3,4,4)
                    ],
                    [
                        Square(4,4,1,3),
                        Square(1,4,2,3),
                        Square(None,4,3,4),
                        Square(None,4,4,4)
                    ]
                ]

        file_reader = FileReader() 

        actual = file_reader._FileReader__convert_file_lines_to_puzzle(lines_of_file)

        for i in range(4):
            for j in range(4):
                self.assertEqual(expected[i][j].number, actual[i][j].number)
                self.assertEqual(expected[i][j].row, actual[i][j].row)
                self.assertEqual(expected[i][j].column, actual[i][j].column)
                self.assertEqual(expected[i][j].block, actual[i][j].block)
Beispiel #30
0
class IndexReader(object):
    def __init__(self, year=YEAR, quarter=QUARTER):
	self.year = year
	self.quarter = quarter
        self.fr = FileReader()
        # self.download_index_file()
        self.company_dictionary = self.build_dictionary()

    def download_index_file(self):
        index_name = 'edgar/full-index/{0}/{1}/company.idx'.format(self.year, self.quarter)
        local_file = 'files/index/'
        self.fr.download(index_name, local_file)

    def build_dictionary(self):
        company_dictionary = {}
        f = open('files/index/company.idx', 'rb')
        line_num = 1
        for line in f:
            if line_num < 11:
                line_num += 1  
                continue
            line = line.split('  ')
            count = 0
            values_extracted = {}
            for item in line:
                if item != '':
                    values_extracted[count] = item
                    count += 1
            company = values_extracted[0].strip()
            type = values_extracted[1].strip()
            cik = values_extracted[2].strip()
            date_filed = values_extracted[3].strip()
            file_name = values_extracted[4].strip()
            if type in ['10-Q', '8-K', '10-K']:
                if company not in company_dictionary:
                    company_dictionary[company] = [] 
                company_dictionary[company].append({'type': type, 'cik': cik, 'date_filed': date_filed, 'file_name': file_name})
        return company_dictionary

    def get_company(self, company_name):
        for key, value in self.company_dictionary.iteritems():
            if key.startswith(company_name):
                for file_dict in value:
                    print file_dict
class TestFileReader(unittest.TestCase):
    TEST_FILENAME = 'test_file.csv'
    EXPECTED_HEADER = ('header_a', 'header_b')
    EXPECTED_ROWS = (
        ('val_a1', 'val_b1'),
        ('val_a2', 'val_b2'),
        ('val_a3', 'val_b3'),
        ('val_a4', 'val_b4'),
        ('val_a5', 'val_b5'),
    )

    @classmethod
    def setUpClass(cls):
        with open(cls.TEST_FILENAME, 'w') as f:
            csv_writer = csv.writer(f)
            rows = (cls.EXPECTED_HEADER, ) + cls.EXPECTED_ROWS
            for r in rows:
                csv_writer.writerow(r)

    def setUp(self):
        self._csv_file = FileReader(self.TEST_FILENAME, has_header=True)

    def tearDown(self):
        self._csv_file.close()

    @classmethod
    def tearDownClass(cls):
        if os.path.isfile(cls.TEST_FILENAME):
            os.remove(cls.TEST_FILENAME)

    def test_read_first_row(self):
        row = self._csv_file.get_next_row()
        self.assertEqual(tuple(row), self.EXPECTED_ROWS[0])

    def test_header(self):
        self.assertEqual(
            tuple(self._csv_file.header),
            self.EXPECTED_HEADER
        )

    def test_iterable(self):
        for rows, expected in zip(self._csv_file, self.EXPECTED_ROWS):
            self.assertEqual(tuple(rows), expected)
    def calcCounts(self):
        # Missing labels not yet added.

        fr = FileReader(self._file)
        rows = fr.getRows()
        self._num_features = len(rows[0]) - 1

        feature_count = {}
        label_count = {}
        label_count['total'] = 0

        for row in rows:
            label_count['total'] += 1
            if label_count.get(row[-1]) is None:
                label_count[row[-1]] = 1
            else:
                label_count[row[-1]] += 1

            if feature_count.get(row[-1]) is None:
                feature_count[row[-1]] = {}
                for i in range(self._num_features):
                    feature_count[row[-1]][str(i)] = {}
                    feature_count[row[-1]][str(i)]['total'] = 0

            for i in range(self._num_features):
                feature_i = feature_count[row[-1]][str(i)]
                value = feature_i.get(row[i])
                feature_i[
                    row[i]] = 1 if value is None else feature_i[row[i]] + 1
                feature_count[row[-1]][str(i)]['total'] += 1

        for label in feature_count:
            for feature in feature_count[label]:
                feature_values = set()
                for l in filter(lambda x: x != 'total', feature_count):
                    for value in feature_count[l][feature]:
                        feature_values.add(value)

                for value in feature_values:
                    if feature_count[label][feature].get(value) is None:
                        feature_count[label][feature][value] = 0

        return (label_count, feature_count)
Beispiel #33
0
def main():
    args = parse_args()

    try:
        rooms = FileReader(args.rooms_path).read()
        students = FileReader(args.students_path).read()
    except FileNotFoundError as e:
        print(e)
        return

    student_rooms = []
    for room in rooms:
        student_rooms.append(StudentRoom(Room(room["id"], room["name"])))
    for student in students:
        student_rooms[student["room"]].students.append(
            Student(student["id"], student["name"]))

    formats = {"json": JSONWriter(), "xml": XMLWriter()}
    formats[args.format].write(student_rooms)
Beispiel #34
0
def test_run(data_type, lower_and_remove_punctuation, remove_stop_words, distance_method):
    """
    Performs a test run, according to the given parameters
    :param data_type: Defines how to store the sentences, expects: 'boolean' / 'tf' / 'tfidf'
    :param lower_and_remove_punctuation: bool, if true turns all words to lower case and removes punctuation
    :param remove_stop_words: bool, if true removes all stop words
    :param distance_method: defines how to calculate distance, expects: 'euclidean' / 'cosine'
    :return: accuracy, the accuracy of the test run
    """
    file_name = "./dataset/amazon_cells_labelled_full.txt"
    train_file_name = "./dataset/amazon_cells_labelled_train.txt"
    test_file_name = "./dataset/amazon_cells_labelled_test.txt"

    data = FileReader(file_name, lower_and_remove_punctuation, remove_stop_words)
    train_set, _ = data.build_set(data_type, train_file_name)
    test_set, _ = data.build_set(data_type, test_file_name)
    classifier = RocchioClassifier(train_set)
    accuracy = calc_accuracy(test_set, classifier, distance_method)
    return accuracy
    def calcCounts(self):
        # Missing labels not yet added.

        fr = FileReader(self._file)
        rows = fr.getRows()
        self._num_features = len(rows[0])-1

        feature_count = {}
        label_count = {}
        label_count['total'] = 0

        for row in rows:
            label_count['total'] += 1
            if label_count.get(row[-1]) is None:
                label_count[row[-1]] = 1
            else:
                label_count[row[-1]] += 1

            if feature_count.get(row[-1]) is None:
                feature_count[row[-1]] = {}
                for i in range(self._num_features):
                    feature_count[row[-1]][str(i)] = {}
                    feature_count[row[-1]][str(i)]['total'] = 0

            for i in range(self._num_features):
                feature_i = feature_count[row[-1]][str(i)]
                value = feature_i.get(row[i])
                feature_i[row[i]] = 1 if value is None else feature_i[row[i]]+1
                feature_count[row[-1]][str(i)]['total'] += 1

        for label in feature_count:
            for feature in feature_count[label]:
                feature_values = set()
                for l in filter(lambda x: x != 'total', feature_count):
                    for value in feature_count[l][feature]:
                        feature_values.add(value)

                for value in feature_values:
                    if feature_count[label][feature].get(value) is None:
                        feature_count[label][feature][value] = 0

        return(label_count, feature_count)
Beispiel #36
0
    def search_paths(self, search_path, kernel_file_name):
        """
        Search a given search path and sub paths.

        :param search_path: Path to search for kernel files.
        :param kernel_file_name: Name of kernel file.
        :returns: Content of kernel file.
        :rtype: String.
        """
        try:
            return FileReader.file_as_string(
                os.path.join(search_path, 'kernels'), kernel_file_name)
        except IOError:
            try:
                return FileReader.file_as_string(
                    os.path.join(search_path, 'kernels',
                                 self.similarity_measure_name),
                    kernel_file_name)
            except IOError:
                return ''
Beispiel #37
0
def main():
    webpages_dir = os.path.join(util.ROOT, 'data/weps2007_data_1.1/traininig/web_pages')
    fe = FeatureExtractor()
    ff = FeatureFilter()
    for name in os.listdir(webpages_dir):
        print 'begin clustering %s' % name
        reader = FileReader(webpages_dir, name)
        description = reader.read_description()
        pc = PersonCorpus(name)
        fm = FeatureMapper()
        for rank in description:
            doc_meta = {}
            html_path = os.path.join(webpages_dir, name, 'raw', rank, 'index.html')
            content = text_extract(html_path)
            features, wordcount = fe.extract(content)
            doc_meta['word_num'] = wordcount
            good_features = ff.filter(features)
            vec = FeatureVector(good_features, fm)
            pc.add_vector(vec)
        pc.compute_matrix()
        pc.dump_matrix()
Beispiel #38
0
  def _read(self):
    cluster_indexes = dict()
    distances = dict()
    count = 0
    start = time.time()
    reader = FileReader(self._input_file, 64, self._process)
    for ci, ds, c in reader.read():
      distances.update(ds)
      count += c
      for x in ci:
        if x not in cluster_indexes:
          cluster_indexes[x] = np.array(ci[x], dtype="int64")
        else:
          cluster_indexes[x] = np.append(cluster_indexes[x], ci[x])

    assert count == len(distances), \
      "ids count: {}, record count: {}".format(
        len(distances), count)
    print("Read data done, read {} records, elapsed: {}".format(
      count, time.time() - start))
    return cluster_indexes, distances, count
Beispiel #39
0
class FileReaderTest(unittest.TestCase):
    def setUp(self):
        self.fr = FileReader('test_data') 
           
    def test__check_line(self):
        self.assertRaises(WrongData, self.fr._check_line, 'AUAGCx')
        self.assertRaises(WrongData, self.fr._check_line, 'aUGCA')
        self.assertRaises(WrongData, self.fr._check_line, 'A')
        try:
            self.fr._check_line('ACGCGCGCGCGCGAAAUUUU')
        except:
            self.fail()
            
    def test_get_one_line(self):
        l = self.fr.get_one_line()
        self.assertEqual(l, 'GCGCGCGC')
    
    def test_iteration(self):
        count = 0
        for x in self.fr:
            count += 1
        self.assertEqual(count, 3)
Beispiel #40
0
import sys
from solver import Solver
from file_reader import FileReader

if len(sys.argv) != 2:
    print("Please specify a puzzle file to solve.")
    sys.exit(0)

filename = sys.argv[1]
file_reader = FileReader()

puzzle = file_reader.get_puzzle_from_file(filename)

solver = Solver()

if solver.is_valid(puzzle):
    print('Okay, I can solve this.')
    solution = solver.solve(puzzle)

    for row in solution:
        for square in row:
            print(str(square.number) + ' ', end='')
        print('\n', end='')
else:
    print('This puzzle is invalid.')
    
Beispiel #41
0
def main(argv):
  setpath()
  try:
    opts, args = getopt.getopt(argv,"ht:e:d:",["train=","test=","maxDepth="])
    if(len(sys.argv) < 7):
      raise getopt.GetoptError(None)

  except getopt.GetoptError:
    print '\nusage: run.py -t <trainfile> -e <testfile> -d <maxDepth>\n'
    sys.exit(2)
  for opt, arg in opts:
    if opt == '-h':
      print 'run.py -t <trainfile> -e <testfile> -d <maxDepth>'
      sys.exit()
    elif opt in ("-t", "--train"):
       trainfile = arg
    elif opt in ("-e", "--test"):
       testfile = arg
    elif opt in ("-d", "--maxDepth"):
       maxDepth = int(arg)

  from file_reader import FileReader
  fr = FileReader(trainfile)

  from decision_tree_builder import DecisionTreeBuilder
  # getRows() returns a dataMatrix;
  dtb = DecisionTreeBuilder(fr.getRows())
  print ('Features: {}'.format(fr.featureNames))
  root = dtb.build(maxDepth)
  print('Tree Building Complete and Successful')
  print('Height of the tree is {}'.format(dtb.decisionTree.height()))

  #Testing section

  #create a zero initialized confusion matrix
  confusion_matrix=[[0 for j in range(len(fr.getClassLabels()))] for j in range(len(fr.getClassLabels()))]
  #read the test file
  testFile_Reader = FileReader(testfile)
  dataMatrix_testFile = testFile_Reader.getRows()
  Error_Count =0
  No =0
  Yes=0
  No_Error= 0
  Yes_error=0
  Total_records = len(dataMatrix_testFile)+0.0
  #Testing phase
  for row in dataMatrix_testFile:
    predicted_classLabel = dtb.predict(row)
    print ('\tActual Label is {}, and Predicted Label is {}'.format(row[len(row)-1], predicted_classLabel))
    #confusion_matrix[int(row[len(row)-1])-1][int(predicted_classLabel)-1]+= 1
    if not row[len(row)-1]==predicted_classLabel:
      Error_Count += 1.0
  #To print confusion matrix for zoo data set uncomment line 66
  print ('\n\n------------------Confusion Matrix----------')
  for row in confusion_matrix:
    print row

  print('\n\n--------------Error Count----------------')
  print Error_Count
  print('\n\n--------------Accuracy----------------')
  print (Total_records-Error_Count)/Total_records
 def setUp(self):
     self._csv_file = FileReader(self.TEST_FILENAME, has_header=True)
Beispiel #43
0
 def setUp(self):
     self.fr = FileReader('test_data') 
Beispiel #44
0
    def __init__(self, year=YEAR, quarter=QUARTER):
	self.year = year
	self.quarter = quarter
        self.fr = FileReader()
        # self.download_index_file()
        self.company_dictionary = self.build_dictionary()