Beispiel #1
0
def main():
    while (1):
        print("1.Compress\n2.Decompress\n3.Exit")
        choice = int(input("Enter Choice :"))
        if choice == 1:
            fileName = input("Enter file Name: ")
            print("Encoding : ", fileName)
            frequencyObject = Frequency(fileName)
            frequencyTable = frequencyObject.frequencyTable()

            huffmanObject = Huffman(frequencyTable)
            huffmanCodes = huffmanObject.huffman()

            encodeObject = Encode(huffmanCodes, fileName)
            encodeObject.encode()

            print("File Encoded as:" + fileName + ".bv\n\n")

        elif choice == 2:
            fileName = input("Enter file Name: ")
            print("decoding : ", fileName)
            decodeObject = Decode(fileName)
            decodeObject.decode()
            print("\nDecoded as " + fileName + "_new.txt")

        elif choice == 3:
            print("Bye\n")
            return
        else:
            print("Invalid Choice\n")
Beispiel #2
0
 def import_gtfs(cls, directory):
     Agency.import_agencies(directory)
     Calendar.import_calendars(directory)
     Stop.import_stops(directory)
     Path.import_paths(directory)
     Route.import_routes(directory)
     Trip.import_trips(directory)
     Frequency.import_frequencies(directory)
Beispiel #3
0
 def export_gtfs(cls, directory):
     Agency.write_agencies(directory)
     Calendar.write_calendars(directory)
     Stop.write_stops(directory)
     Route.write_routes(directory)
     Trip.write_trips(directory)
     Frequency.write_frequencies(directory)
     Path.write_paths(directory)
Beispiel #4
0
 def close(self):
     self._is_open = False
     self._dbname = None
     
     # clear everything
     Agency.clear()
     Calendar.clear()
     Stop.clear()
     Path.clear()
     Route.clear()
     TripRoute.clear()
     Trip.clear()
     Frequency.clear()
     Picture.clear()
 def __init__(self, N):
     self.frequency = Frequency()
     self.noOfDocs = N
class VectorSpaceModel():
    def __init__(self, N):
        self.frequency = Frequency()
        self.noOfDocs = N

    def userInterface(self):
        op = '1'
        while (op != '0'):
            print("vector Space Model")
            print("-----------------------------")
            print("1. Execute Query")
            print("0. Exit")

            op = input("Enter input: ")

            self.inputQuery(op)

    def inputQuery(self, op):

        if op == '1':
            query = input("Enter query: ")
            queryArr = query.split(" ")
            self.data = self.createTable()

            qVector = self.getVector(queryArr)
            docVectors = self.getDocumentVectors()

            #            print('q= ', qVector)
            #            print('docs = ', docVectors)

            rankings = self.generateRankings(docVectors, qVector)
            print(self.formatRankings(rankings))

        else:
            return

    def formatRankings(self, rankings):
        rankings = rankings.loc[rankings['sim'] > 0.005]
        return rankings.sort_values(by=['sim'], ascending=False)

    def generateRankings(self, docs, q):
        rankings = pd.DataFrame({
            'docs': [str(x) + '.txt' for x in range(1, self.noOfDocs + 1)],
            'sim': [self.sim(docs[i], q) for i in range(1, self.noOfDocs + 1)]
        })
        return rankings

    def sim(self, d, q):
        x = np.array(d)
        y = np.array(q)

        modX = sum(x * x)**0.5
        modY = sum(y * y)**0.5

        return sum(x * y) / (modX * modY)

    def createTable(self):
        self.frequency.loadDocuments()
        self.frequency.buildDictionary()
        #        data = pd.DataFrame({
        #                'words': self.frequency.getWords(),
        #                'idf': self.frequency.getIdf()
        #                })

        keys = self.frequency.getWords()
        values = self.frequency.getIdf(self.noOfDocs)

        data = dict(zip(keys, values))
        #        print('data: ', data)
        return data

    def getVector(self, array, docId=0):
        vector = []

        for word, idf in self.data.items():

            if word in array:
                if (docId == 0):
                    # docId=0 means getting vector for query
                    tf = self.getQueryFrequency(array)[word]
                else:
                    tf = self.frequency.getTermFrequency(word)[docId]

                vector.append(self.tf_idf(tf, idf))
            else:
                vector.append(0)
        return vector

    def getDocumentVectors(self):
        docVectors = {}
        docId = 1
        for i in range(self.noOfDocs):
            doc = self.frequency.collection[i]
            docVectors[docId] = self.getVector(doc, docId)
            docId += 1
        return docVectors

    def getQueryFrequency(self, queryArr):
        tf = {}
        for q in queryArr:
            if q not in tf:
                tf[q] = 1
            else:
                tf[q] = tf[q] + 1
        return tf

    def tf_idf(self, tf, idf):
        return tf * idf
    def decode():
        
        standard_frequency = Frequency.get(Util.FILE_NAME)

        text_frequency = standard_frequency.fromkeys(standard_frequency.keys(), float(0))
        
        file = open(Util.ENCODED_FILE, "r")
        
        numchar = 0
        c = file.read(1)
        while c != '':
            o = ord(c)
            
            if o in range(65, 90):
                o = o + 32
            
            if o in range(97, 122):
                c = chr(o)
                text_frequency[c] = text_frequency[c] + 1
                numchar = numchar + 1
            
            c = file.read(1)
        
        file.close()
                
        for f in text_frequency:
            text_frequency[f] = 100 * text_frequency[f] / numchar
            #print f, text_frequency[f]
        
        sorted_standard_frequency = sorted(standard_frequency.items(), key=operator.itemgetter(1), reverse=True)
        sorted_text_frequency = sorted(text_frequency.items(), key=operator.itemgetter(1), reverse=True)
        #print sorted_standard_frequency
        #print sorted_text_frequency
        
        keys = []
        list_index = 0
        index_key = 0
        while index_key == 0:
            
            standard_character, standard_value = sorted_standard_frequency[0]
            text_character, text_value = sorted_text_frequency[list_index]
            list_index = list_index + 1
            
            standard_ascii = ord(standard_character)
            text_ascii = ord(text_character)
            
            
            key_value = text_ascii - standard_ascii
            
            if key_value < 0:
                key_value = key_value + 26
            
            found_key = False
            for key, key_confidence in keys:                
                if key_value == key:
                    found_key = True                    
            
            if not found_key:                
                keys.insert(len(keys), (key_value, 0))
                
                key_value, key_confidence = keys[len(keys) - 1]
                
                for f in sorted_text_frequency:
                    text_character, text_value = f
                    
                    decodedKey = ord(text_character) - key_value
                    
                    if decodedKey < 97:
                        decodedKey = decodedKey + 26
                        
                    standard_character, standard_value = sorted_standard_frequency[sorted_text_frequency.index(f)]
                    
                    if (standard_character == chr(decodedKey)):
                        key_confidence = key_confidence + text_value
                
                keys[len(keys) - 1] = (key_value, key_confidence)
                
                for k in keys:
                    value, confidence = k
                    print keys.index(k)+1 ,". Encryption key: ", value, ", confidence value: ", confidence, "."
                
                
                while True:
                    input_key = raw_input("Select a key or type 0 to calculate another key -> ")
                    try:
                        index_key = int(input_key)
                        if (index_key >= 1) and (index_key <= len(keys)):
                            key_value, key_confidence = keys[index_key - 1]
                            break
                        else:
                            if (index_key == 0) and (list_index < len(sorted_standard_frequency)):
                                print "Calculating another key..."
                                break
                            else:
                                if index_key == 0:
                                    print "All keys have been already generated!"
                                else:
                                    print "Key index incorrect!"
                    
                    except Exception as e:
                        print "Key index incorrect, please insert a number!"
                
            
        file = open(Util.ENCODED_FILE, "r")
        decrypted = open(Util.DECODED_FILE, "w")
        encoded_character = file.read(1)
        while encoded_character != '':
            
            ascii = ord(encoded_character)
            decoded_character = ascii
            
            if (ascii >= 65 and ascii <= 90) or (ascii >= 97 and ascii <= 122):
                
                decoded_character = ascii - key_value
                
                if decoded_character < 65:
                    decoded_character = decoded_character + 26
                if decoded_character < 97 and (ascii >= 97 and ascii <= 122):
                    decoded_character = decoded_character + 26
            
            decrypted.write(chr(int(decoded_character)))
            
            encoded_character = file.read(1)
        
        decrypted.close()
        file.close()
          
Beispiel #8
0
                # !mwd - no need to set previous blocks
                #  since setting the next block automatically
                #  sets previously blocks
                #try:
                #    previous_block = Trip.get(int(previous_trip))
                #    trip.previous_block = previous_block
                #except Exception, e: pass

                try:
                    next_block = Trip.get(int(next_trip))
                    trip.next_block = next_block
                except Exception, e: pass

            for frequency_node in tree.getroot().findall('Frequency'):
                frequency_id = frequency_node.get('id', Frequency.new_id())
                gtfs_id = frequency_node.get('gtfs_id', None)
                trip_route_id = frequency_node.findtext('trip_route_id')
                start = frequency_node.findtext('start')
                end = frequency_node.findtext('end')
                headway = frequency_node.findtext('headway')

                trip_route = TripRoute.get(int(trip_route_id))
                if trip_route is None:
                    print "Missing trip route id ", trip_route_id
                    print "for frequency id ", frequency_id
                    continue
                    

                frequency = trip_route.add_frequency(start, end, headway)
                frequency.frequency_id = int(frequency_id)
Beispiel #9
0
# -*- coding:utf-8 -*-

__author__ = 'Kusamura'

from Data import Data
from InvertedIndex import InvertedIndex
from Search import Search
from Frequency import Frequency

if __name__ == '__main__':
    fileList = []  #章の一覧
    for line in open('chaps/chap_title.tsv', 'r'):  #chaps_titleから章の一覧を生成
        fileList.append(line[:-1].split('\t')[0][2:])
    dataList = []  #Dataクラス(ファイルから読み込んだデータ)のリスト
    for fileName in fileList:
        dataList.append(Data(fileName))

    index = InvertedIndex(dataList)  #転置インデックスを作成

    #	module = Search(index) #検索モジュール
    #	keys = ['retrieval', 'half-a-trillion', 'thus', 'layer', 'test', 'hoge', 'hogehoge']
    #	for key in keys:
    #		module.do(key)

    #	print index.countKeys() #辞書内の語数をカウント

    #	print Frequency().docFrequency(index) #辞書頻度

    print Frequency().termFrequency(dataList)  #単語頻度