def test_hello(): world = Lab1.hello('World') assert world == 'Hello World!', f'"{world}" != "Hello World!"' you = Lab1.hello( 'you' ) # sneaky, the first letter needs to be capitalized in the output assert you == 'Hello You!', f'"{you}" != "Hello You!"'
def main(): stop_set = Lab1.get_stopwords() dict_set = get_dict() sessions = get_sessions() while (1): query = input('Gimme yo Query: ') query = query.lower() tokenized_query = query.split(' ') for i in range(len(tokenized_query)): if tokenized_query[i] not in dict_set: tokenized_query[i] = correct_word(tokenized_query[i], stop_set, dict_set, sessions) # rejoin query new_query = '' for word in tokenized_query: new_query += word + ' ' print("New Query: " + new_query) new_query = new_query[:-1] best_5, doc_indices = Lab1.query_run(new_query) for entry in best_5: snippets = generate_snippet(entry.file, tokenized_query, stop_set) to_print = '' for snippet in snippets: to_print += snippet[0] + '. ' to_print = to_print[:-1] print("\nFile Name: " + entry.file) to_print = bold_snippet(to_print, tokenized_query) print("Snippet: \n\t" + to_print + '\n')
def test_roll_array(): a = np.arange(10) assert np.array_equal(Lab1.roll_array(a, 2, 'right'), np.asarray([8, 9, 0, 1, 2, 3, 4, 5, 6, 7])) assert np.array_equal(Lab1.roll_array(a, 4, 'left'), np.asarray([4, 5, 6, 7, 8, 9, 0, 1, 2, 3]))
def onCLicked_Plot(self): self.statusBar().showMessage('Plotting ID: ' + str(self.ids.currentText() + ' ...')) X = [] Y = [] color = list(np.random.choice(range(256), size=3)) pen = pg.mkPen(color=color, width=2) brush = pg.mkBrush(color=color) global df_filtered, id_str, plotted temp = [str(s) for s in self.ids.currentText() if s.isdigit()] id_str = str("".join(temp)) if process == "csv": X, Y, df_filtered = Lab1.Filtering_Data(df, id_str) elif process == "sql": cursor = connection.cursor() cursor.execute("SELECT latitude FROM TAB WHERE id LIKE " + id_str) X = cursor.fetchall() X = list(itertools.chain(*X)) cursor.execute("SELECT longitude FROM TAB WHERE id LIKE " + id_str) Y = cursor.fetchall() Y = list(itertools.chain(*Y)) plotted.append(id_str) self.tb_id.addItem(id_str) self.tb_id.setCurrentIndex(self.tb_id.findData(id_str)) self.graphPlot.plot(X, Y, pen=pen, symbol='o', symbolSize=4, symbolBrush=brush) self.statusBar().showMessage('Ready.')
def get_prior(suggested, stop_set): file_list = glob.glob('To_be_posted/*.txt') occur = 0 total = 0 for file in file_list: tokenized = Lab1.tokenize_file(file, stop_set) total += len(tokenized) occur += tokenized.count(suggested) return occur / total
def onChangedVal(self): global df_filtered if self.tb_id.currentText() != '': if process == "csv": df_filtered = Lab1.Filtering_Data(df, self.tb_id.currentText(), mode=True) elif process == "sql": cursor = connection.cursor() cursor.execute("SELECT * FROM TAB WHERE id LIKE " + self.tb_id.currentText()) df_filtered = cursor.fetchall() self.TableFill()
def ex3_check(): c_param = 2 between = arange(-2, 2, 0.0001) values1 = Lab1.apply_for_all(between, ex3.f) values2 = Lab1.apply_for_all(between, ex3.g, c_param) randoms = generate_random_numbers(ex3, c_param, nr_of_samples) pyplot.figure(3) pyplot.subplot(2, 1, 1).set_title('Gęstość rozkladu') pyplot.plot(between, values2, 'o', label='cg(x)') pyplot.plot(between, values1, '*', label='f(x)') pyplot.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc='lower left', ncol=2, mode="expand", borderaxespad=0.) pyplot.subplot(2, 1, 2).set_title('Histogram wygenerowanych liczb') pyplot.hist(randoms, 40) pyplot.show()
def test_NNClassifier(): # Make some training data train = Lab1.generate_data() x_train = train[['height', 'width']] y_train = train['label'] # Make some new data to test with test = Lab1.generate_data(random_state=43) # we want different test data x_test = test[['height', 'width']] y_test = test['label'] classifier = Lab1.OneNearestNeighborClassifier() # Initialize classifier.fit(x_train, y_train) # Learn from the training data prediction = classifier.predict( x_test) # Make a prediction about the test data accuracy = np.sum(y_test == prediction) / len(y_test) target_accuracy = 0.75 assert accuracy >= target_accuracy, f'Accuracy {accuracy} != {target_accuracy}'
def test_NNC_imbalanced(): # Make some training data train = Lab1.generate_data() x_train = train[['height', 'width']] y_train = train['label'] # Make some new data to test with test = Lab1.generate_data( random_state=43, proportion=0.3) # switch to class b as the most common x_test = test[['height', 'width']] y_test = test['label'] clf = Lab1.OneNearestNeighborClassifier() # Initialize clf.fit(x_train, y_train) # Learn from the training data prediction = clf.predict(x_test) # Make a prediction about the test data accuracy = np.sum(y_test == prediction) / len(y_test) target_accuracy = 0.65 assert accuracy >= target_accuracy, f'Accuracy {accuracy} != {target_accuracy}'
def t1(): print('{}'.format('-' * 20)) print('t1: Testing read_airports:\n') filename1 = 'airports1.txt' dict1, set1 = Lab1.read_airports(filename1) print('Airport Dictionary:') print(dict1) print('Province Set:') print(set1) print() filename2 = 'airports2.txt' dict2, set2 = Lab1.read_airports(filename2) print('Airport Dictionary:') print(dict2) print('Province Set:') print(set2) print() print('End of t1 testing') print('{}\n'.format('-' * 20)) return
def test_surface_class(): df = pd.DataFrame({ 'height': [1, 2, 3, 4, 5], 'width': [5, 4, 8, 10, 1], 'class_labels': ['a', 'b', 'c', 'a', 'a'] }) labels = np.asarray(Lab1.surface_class(df, 8)) target = np.asarray(['b', 'c', 'a']) assert np.array_equal(labels, target), f"{labels} != {target}" labels = np.asarray(Lab1.surface_class(df, 9)) target = np.asarray(['c', 'a']) assert np.array_equal(labels, target), f"{labels} != {target}" labels = np.asarray(Lab1.surface_class(df, 24)) target = np.asarray(['c', 'a']) assert np.array_equal(labels, target), f"{labels} != {target}" labels = np.asarray(Lab1.surface_class(df, 25)) target = np.asarray(['a']) assert np.array_equal(labels, target), f"{labels} != {target}"
def importCSV(self): separation = None file = QtWidgets.QFileDialog.getOpenFileName( QtWidgets.QFileDialog(), 'Select dataset', os.getcwd(), "Comma-separated values (*.csv) ;; SQlite database (*.db *.sdb *.sqlite *.db3 *.s3db *.sqlite3 *.db2 *.s2db *.sqlite2 *.sl2)" ) self.statusBar().showMessage('Importing ' + str(file[0]) + ', please wait...') global df, process, connection, ids if file[0].endswith(".csv"): process = 'csv' sep, ok = QtWidgets.QInputDialog.getText(self, 'CSV separator', 'Enter separator:') if ok: separation = sep else: separation = '' if separation == '': with open(file[0]) as csvFile: delimiter = csv.Sniffer().sniff( csvFile.read(1024)).delimiter else: delimiter = separation df = pd.read_csv(file[0], sep=delimiter) ids = Lab1.getIDs(df) for id in sorted(ids): self.ids.addItem(str(id)) elif file[0].endswith(".db") or file[0].endswith(".sdb") or file[0].endswith(".sqlite") or file[0].endswith( ".db3") or file[0].endswith(".s3db") or file[0].endswith(".sqlite3") or file[0].endswith(".db2") or \ file[0].endswith(".s2db") or file[0].endswith(".sqlite2") or file[0].endswith(".sl2"): connection = None try: connection = sqlite3.connect(file[0]) except Error as e: print(e) process = 'sql' cursor = connection.cursor() cursor.execute("SELECT * FROM TAB") df = cursor.fetchall() cursor.execute("SELECT DISTINCT nom FROM TAB2") ids = cursor.fetchall() for id in sorted(ids): self.ids.addItem(str(id[0]))
def t2(): print('{}'.format('-' * 20)) print('t2: Testing query_airports_DB:\n') code_name_dict = { 'YLW': 'Kelowna Airport', 'YQB': 'Quebec City Jean Lesage Airport', 'YQG': 'Windsor Airport', 'YVR': 'Vancouver Airport', 'YAD': 'Moose Lake Airport', 'YMX': 'Montreal Mirabel Airport', 'YXE': 'Saskatoon John Diefenbaker Airport', 'YHC': 'Halifax Stanfield Airport', 'YRB': 'Resolute Bay Airport', 'YKF': 'Region of Waterloo Airport' } prov_code_dict = { 'British Columbia': 'YLW YVR', 'Quebec': 'YQB YMX', 'Ontario': 'YQG YKF', 'Manitoba': 'YAD', 'Saskatchewan': 'YXE', 'Nova Scotia': 'YHC', 'Saskatchewan': 'YXE', 'Nova Scotia': 'YHC', 'Nunavut': 'YRB' } print('code_name_dict = ') for item in code_name_dict.items(): print(item) print() print('prov_code_dict = ') for item in prov_code_dict.items(): print(item) print() prov_name_dict = Lab1.query_airports_DB(code_name_dict, prov_code_dict) print('prov_name_dict:') for item in prov_name_dict.items(): print(item) print() print('End of t2 testing') print('{}\n'.format('-' * 20)) return
def get_statistic(self): try: if not self.data.item(0, 0): stat_data_set = choices(self.default_set, k=15) # first task else: stat_data_set = table_to_list(self.data) if len(stat_data_set) < 2: raise IOError('Not enough data') self.poligon.update_figure(stat_data_set, 'freq_pol') self.empiric.update_figure(stat_data_set, 'empiric') self.cumulate.update_figure(stat_data_set, 'cumulate') self.cumulate_rel.update_figure(stat_data_set, 'cumulate_relative') self.freq_pol_rel.update_figure(stat_data_set, 'freq_pol_relative') self.output.setText(str(Lab1.get_data_set(stat_data_set))) self.update() self.updateGeometry() except Exception as error: self.output.setText(error.args[0])
def test_plus_one(): assert Lab1.plus_one(1) == 2 assert Lab1.plus_one(10) == 11 with pytest.raises(TypeError): Lab1.plus_one('two')
def test_exercise_1(): assert Lab1.count("ACAACTATGCATACTATCGGGAACTATCCT", "ACTAT") == answers['answer_exercise_1a']
def test_exercise_2(): assert Lab1.frequent_words("ACAACTATGCATACTATCGGGAACTATCCT", 4) == answers['answer_exercise_2a']
def test_question_1(): assert Lab1.answer_question_1() == answers['answer_question_1']
def test_exercise_3(): assert Lab1.reverse_complement("cagt") == answers['answer_exercise_3']
def test_exercise_4(): assert Lab1.frequency_table(Lab1.text, 3) == answers["answer_exercise_4"]
def __add__(self, other): if not isinstance(other, UnsignedBinaryInteger): raise ValueError("Must be usginedBinaryInteger type") result = UnsignedBinaryInteger(Lab1.add_binary(self.data, other.data)) return result
def generate_X(self): return Lab1.Generator.generate_random_numbers( sgen.generate_random_numbers(time(), 1), Lab1.Ex4(1, 0))[0]
def g(self, x): return Lab1.Ex4(1, 0).f(x)
def D_N(empiric_dist, space, ex=Lab1.Ex1()): d_n = [] for i in range(len(empiric_dist)): d_n.append(abs(empiric_dist[i] - ex.F(space[i]))) return max(d_n)
# F_real = [Lab1.Ex1().F(x) for x in space] # for nr_of_samples in n: # # F_empiric = empiric_distribution(randoms[0:nr_of_samples], space) # # D.append(D_N(F_empiric,space)) # # pyplot.plot(n,D,'r*') # # pyplot.xlabel('N') # pyplot.ylabel('D(N)') # pyplot.plot(space, F_real, 'o') # pyplot.plot(space, F_empiric, '*') nr_of_samples = 500 keys = [random.uniform(0, 1) for i in range(nr_of_samples)] randoms = Lab1.Generator.generate_random_numbers(keys, Lab1.Ex1()) F_empiric = empiric_distribution(randoms, space) fig = pyplot.figure() ax1 = fig.add_subplot(211) ax2 = fig.add_subplot(212) ax1.title.set_text(r'Dystrybuanta empiryczna') ax2.title.set_text(r'Estymator wariancji') pyplot.subplot(2, 1, 1) pyplot.plot(space, F_empiric) pyplot.subplot(2, 1, 2) pyplot.plot(space, empiric_variance(F_empiric, Lab1.Ex1().F, space)) pyplot.show()
def test_exercise_6(): assert Lab1.skew(Lab1.genome) == answers["answer_exercise_6"]
import Lab1 Lab1.read_csv() Lab1.read_webpage() Lab1.read_xls()
def test_exercise_5(): assert Lab1.better_frequent_words(Lab1.text, 9) == answers["answer_exercise_5"]
import Lab1, Lab2, Lab3, Lab4 while True: print( "Which lab you want to run:\n1. Lab 1\n2. Lab 2\n3. Lab 3\n4. Lab 4\n5. Exit" ) num = int(input("Enter the laboratory number: ")) numbers = [1, 2, 3, 4, 5] if num not in numbers: print("You enter wrong laboratory number!") num = int(input("Enter the laboratory number: ")) if num in numbers: if num == 1: print("\nLab 1 starting ...\n") Lab1.lab1("l1_1.txt") print("\nend\n") if num == 2: print("\nLab 2 starting ...\n") Lab2.lab2("l2-1.txt") print("\nend\n") if num == 3: print("\nLab 3 starting ...\n") Lab3.lab3("l3-1.txt") print("\nend\n") if num == 4: print("\nLab 4 starting ...\n") Lab4.lab4("l4-1.txt") print("\nend\n") if num == 5: break
import Lab1 result = Lab1.divAlg(103,10) q1 = result[0] r1 = result[1] result2 = Lab1.divAlg(50,16) result3 = Lab1.divAlg(72,9) q2 = result2[0] r2 = result2[1] q3 = result3[0] r3 = result3[1] print("103/10 --- Quotient: {0} Remainder: {1}".format(q1, r1)) print("50/16 --- Quotient: {0} Remainder: {1}".format(q2, r2)) print("72/9 --- Quotient: {0} Remainder: {1}".format(q3, r3))