def main(): while (1): print("1.Compress\n2.Decompress\n3.Exit") choice = int(input("Enter Choice :")) if choice == 1: fileName = input("Enter file Name: ") print("Encoding : ", fileName) frequencyObject = Frequency(fileName) frequencyTable = frequencyObject.frequencyTable() huffmanObject = Huffman(frequencyTable) huffmanCodes = huffmanObject.huffman() encodeObject = Encode(huffmanCodes, fileName) encodeObject.encode() print("File Encoded as:" + fileName + ".bv\n\n") elif choice == 2: fileName = input("Enter file Name: ") print("decoding : ", fileName) decodeObject = Decode(fileName) decodeObject.decode() print("\nDecoded as " + fileName + "_new.txt") elif choice == 3: print("Bye\n") return else: print("Invalid Choice\n")
def import_gtfs(cls, directory): Agency.import_agencies(directory) Calendar.import_calendars(directory) Stop.import_stops(directory) Path.import_paths(directory) Route.import_routes(directory) Trip.import_trips(directory) Frequency.import_frequencies(directory)
def export_gtfs(cls, directory): Agency.write_agencies(directory) Calendar.write_calendars(directory) Stop.write_stops(directory) Route.write_routes(directory) Trip.write_trips(directory) Frequency.write_frequencies(directory) Path.write_paths(directory)
def close(self): self._is_open = False self._dbname = None # clear everything Agency.clear() Calendar.clear() Stop.clear() Path.clear() Route.clear() TripRoute.clear() Trip.clear() Frequency.clear() Picture.clear()
def __init__(self, N): self.frequency = Frequency() self.noOfDocs = N
class VectorSpaceModel(): def __init__(self, N): self.frequency = Frequency() self.noOfDocs = N def userInterface(self): op = '1' while (op != '0'): print("vector Space Model") print("-----------------------------") print("1. Execute Query") print("0. Exit") op = input("Enter input: ") self.inputQuery(op) def inputQuery(self, op): if op == '1': query = input("Enter query: ") queryArr = query.split(" ") self.data = self.createTable() qVector = self.getVector(queryArr) docVectors = self.getDocumentVectors() # print('q= ', qVector) # print('docs = ', docVectors) rankings = self.generateRankings(docVectors, qVector) print(self.formatRankings(rankings)) else: return def formatRankings(self, rankings): rankings = rankings.loc[rankings['sim'] > 0.005] return rankings.sort_values(by=['sim'], ascending=False) def generateRankings(self, docs, q): rankings = pd.DataFrame({ 'docs': [str(x) + '.txt' for x in range(1, self.noOfDocs + 1)], 'sim': [self.sim(docs[i], q) for i in range(1, self.noOfDocs + 1)] }) return rankings def sim(self, d, q): x = np.array(d) y = np.array(q) modX = sum(x * x)**0.5 modY = sum(y * y)**0.5 return sum(x * y) / (modX * modY) def createTable(self): self.frequency.loadDocuments() self.frequency.buildDictionary() # data = pd.DataFrame({ # 'words': self.frequency.getWords(), # 'idf': self.frequency.getIdf() # }) keys = self.frequency.getWords() values = self.frequency.getIdf(self.noOfDocs) data = dict(zip(keys, values)) # print('data: ', data) return data def getVector(self, array, docId=0): vector = [] for word, idf in self.data.items(): if word in array: if (docId == 0): # docId=0 means getting vector for query tf = self.getQueryFrequency(array)[word] else: tf = self.frequency.getTermFrequency(word)[docId] vector.append(self.tf_idf(tf, idf)) else: vector.append(0) return vector def getDocumentVectors(self): docVectors = {} docId = 1 for i in range(self.noOfDocs): doc = self.frequency.collection[i] docVectors[docId] = self.getVector(doc, docId) docId += 1 return docVectors def getQueryFrequency(self, queryArr): tf = {} for q in queryArr: if q not in tf: tf[q] = 1 else: tf[q] = tf[q] + 1 return tf def tf_idf(self, tf, idf): return tf * idf
def decode(): standard_frequency = Frequency.get(Util.FILE_NAME) text_frequency = standard_frequency.fromkeys(standard_frequency.keys(), float(0)) file = open(Util.ENCODED_FILE, "r") numchar = 0 c = file.read(1) while c != '': o = ord(c) if o in range(65, 90): o = o + 32 if o in range(97, 122): c = chr(o) text_frequency[c] = text_frequency[c] + 1 numchar = numchar + 1 c = file.read(1) file.close() for f in text_frequency: text_frequency[f] = 100 * text_frequency[f] / numchar #print f, text_frequency[f] sorted_standard_frequency = sorted(standard_frequency.items(), key=operator.itemgetter(1), reverse=True) sorted_text_frequency = sorted(text_frequency.items(), key=operator.itemgetter(1), reverse=True) #print sorted_standard_frequency #print sorted_text_frequency keys = [] list_index = 0 index_key = 0 while index_key == 0: standard_character, standard_value = sorted_standard_frequency[0] text_character, text_value = sorted_text_frequency[list_index] list_index = list_index + 1 standard_ascii = ord(standard_character) text_ascii = ord(text_character) key_value = text_ascii - standard_ascii if key_value < 0: key_value = key_value + 26 found_key = False for key, key_confidence in keys: if key_value == key: found_key = True if not found_key: keys.insert(len(keys), (key_value, 0)) key_value, key_confidence = keys[len(keys) - 1] for f in sorted_text_frequency: text_character, text_value = f decodedKey = ord(text_character) - key_value if decodedKey < 97: decodedKey = decodedKey + 26 standard_character, standard_value = sorted_standard_frequency[sorted_text_frequency.index(f)] if (standard_character == chr(decodedKey)): key_confidence = key_confidence + text_value keys[len(keys) - 1] = (key_value, key_confidence) for k in keys: value, confidence = k print keys.index(k)+1 ,". Encryption key: ", value, ", confidence value: ", confidence, "." while True: input_key = raw_input("Select a key or type 0 to calculate another key -> ") try: index_key = int(input_key) if (index_key >= 1) and (index_key <= len(keys)): key_value, key_confidence = keys[index_key - 1] break else: if (index_key == 0) and (list_index < len(sorted_standard_frequency)): print "Calculating another key..." break else: if index_key == 0: print "All keys have been already generated!" else: print "Key index incorrect!" except Exception as e: print "Key index incorrect, please insert a number!" file = open(Util.ENCODED_FILE, "r") decrypted = open(Util.DECODED_FILE, "w") encoded_character = file.read(1) while encoded_character != '': ascii = ord(encoded_character) decoded_character = ascii if (ascii >= 65 and ascii <= 90) or (ascii >= 97 and ascii <= 122): decoded_character = ascii - key_value if decoded_character < 65: decoded_character = decoded_character + 26 if decoded_character < 97 and (ascii >= 97 and ascii <= 122): decoded_character = decoded_character + 26 decrypted.write(chr(int(decoded_character))) encoded_character = file.read(1) decrypted.close() file.close()
# !mwd - no need to set previous blocks # since setting the next block automatically # sets previously blocks #try: # previous_block = Trip.get(int(previous_trip)) # trip.previous_block = previous_block #except Exception, e: pass try: next_block = Trip.get(int(next_trip)) trip.next_block = next_block except Exception, e: pass for frequency_node in tree.getroot().findall('Frequency'): frequency_id = frequency_node.get('id', Frequency.new_id()) gtfs_id = frequency_node.get('gtfs_id', None) trip_route_id = frequency_node.findtext('trip_route_id') start = frequency_node.findtext('start') end = frequency_node.findtext('end') headway = frequency_node.findtext('headway') trip_route = TripRoute.get(int(trip_route_id)) if trip_route is None: print "Missing trip route id ", trip_route_id print "for frequency id ", frequency_id continue frequency = trip_route.add_frequency(start, end, headway) frequency.frequency_id = int(frequency_id)
# -*- coding:utf-8 -*- __author__ = 'Kusamura' from Data import Data from InvertedIndex import InvertedIndex from Search import Search from Frequency import Frequency if __name__ == '__main__': fileList = [] #章の一覧 for line in open('chaps/chap_title.tsv', 'r'): #chaps_titleから章の一覧を生成 fileList.append(line[:-1].split('\t')[0][2:]) dataList = [] #Dataクラス(ファイルから読み込んだデータ)のリスト for fileName in fileList: dataList.append(Data(fileName)) index = InvertedIndex(dataList) #転置インデックスを作成 # module = Search(index) #検索モジュール # keys = ['retrieval', 'half-a-trillion', 'thus', 'layer', 'test', 'hoge', 'hogehoge'] # for key in keys: # module.do(key) # print index.countKeys() #辞書内の語数をカウント # print Frequency().docFrequency(index) #辞書頻度 print Frequency().termFrequency(dataList) #単語頻度