Example #1
0
    def _load_data(self, sample_set, sub_sampling):
        X = []
        y = []

        for _, sample in sample_set.iterrows():
            sample_path = sample['PATH'].replace('.mp3', '.npy')
            sample_data = np.load(path.join(self.data_path, sample_path))

            if sub_sampling:
                sample_data = utils.get_windows(sample=sample_data,
                                                window=self.window,
                                                window_size=self.window_size,
                                                num_windows=self.num_windows)
                X.extend(sample_data)
                y.extend([sample['TAGS']] * self.num_windows)
            else:
                center_sample = utils.get_windows(sample=sample_data,
                                                  window='center',
                                                  window_size=self.window_size,
                                                  num_windows=1)
                X.append(center_sample[0])
                y.append(sample['TAGS'])

        X = np.array(X)
        y = np.array(y)
        X = X.reshape(*X.shape, 1)
        return X, y
Example #2
0
 def get_trip_distance(self, trip: Iterable[str]) -> int:
     """
     >>> Network().get_trip_distance([])
     0
     >>> Network.from_distances_text(
     ...     "London to Dublin = 464\\n"
     ...     "London to Belfast = 518\\n"
     ...     "Dublin to Belfast = 141\\n"
     ... ).get_trip_distance(['Dublin', 'London', 'Belfast'])
     982
     """
     return sum(self.distances[(lhs, rhs)]
                for lhs, rhs in get_windows(trip, 2))
def online_identification(k, w, window_size, train, test):

    click.echo('\n--- Online flight degradation identification ---')

    train_csv = load_csvs(train, should_preprocess=False)
    test_csv = load_csvs(test, should_preprocess=False)
    
    click.echo('  - k     : %d ' % k)
    click.echo('  - w     : %d ' % w)
    click.echo('  - train : %s ' % train)
    click.echo('  - test  : %s ' % test)

    click.echo('\nRunning...\n')
    
    # For Test
    last_second = test_csv[0]['data']['seconds'].iloc[-1]
    #print('[TEST] Last second: {}'.format(last_second))

    for time_window in range(0, last_second, window_size):
        click.echo('Time window is: %d ' % time_window)

        train_window = get_windows(train_csv, time_window, time_window + window_size - 1)
        test_window = get_windows(test_csv, time_window, time_window + window_size - 1)
        
        train_data, train_label = window_to_lists(train_window, 'pose_position_z')
        test_data, test_label = window_to_lists(test_window, 'pose_position_z')


        model = KnnDtw(k_neighbours = k, max_warping_window = w)
        model.fit(np.array(train_data), np.array(train_label))
        
        #click.echo(train_label)

        predicted_label, probability = model.predict(test_data, parallel=True)
        click.echo('Predicted label : %s ' % str(predicted_label))
    
        click.echo('\n')

    click.echo('\nDone.')
Example #4
0
    def at_least_one_letter_twice_in_a_row(self) -> bool:
        """
        >>> String("xx").at_least_one_letter_twice_in_a_row()
        True
        >>> String("abcdde").at_least_one_letter_twice_in_a_row()
        True
        >>> String("aabbccdd").at_least_one_letter_twice_in_a_row()
        True
        >>> String("jchzalrnumimnmhp").at_least_one_letter_twice_in_a_row()
        False
        """
        for first, second in get_windows(self.string, 2):
            if first == second:
                return True

        return False
 def get_arrangement_happiness(self, arrangement: Tuple[str, ...]) -> int:
     """
     >>> AttendeeSet({('a', 'b'): 2, ('b', 'a'): 2, ('b', 'c'): -1,
     ...     ('c', 'a'): 5})\\
     ...     .get_arrangement_happiness(('a', 'b'))
     2
     >>> AttendeeSet({('a', 'b'): 2, ('b', 'c'): -1, ('c', 'a'): 5})\\
     ...     .get_arrangement_happiness(('a', 'b', 'c'))
     6
     """
     if len(arrangement) < 2:
         return 0
     elif len(arrangement) == 2:
         lhs, rhs = arrangement
         return self.happiness_map[(lhs, rhs)]
     return sum(
         self.happiness_map[(lhs, rhs)]
         for lhs, rhs in get_windows(arrangement + arrangement[:1], 2)
     )
    def get_minimum_step_count(
            self,
            start: int = 0,
            return_to_start: bool = False,
            debugger: Debugger = Debugger(enabled=False),
    ) -> int:
        """
        >>> Graph({
        ...     (0, 1): 2, (1, 0): 2,
        ...     (0, 4): 2, (4, 0): 2,
        ...     (1, 2): 6, (2, 1): 6,
        ...     (2, 3): 2, (3, 2): 2,
        ...     (3, 4): 8, (4, 3): 8,
        ... }).get_minimum_step_count()
        14
        """
        nodes = self.get_nodes()
        if start not in nodes:
            raise Exception(f"Start {start} is not in nodes {nodes}")
        other_nodes = set(nodes) - {start}
        prefix = (start, )
        if return_to_start:
            suffix = prefix
        else:
            suffix = ()
        visit_orders = (prefix + permutation + suffix
                        for permutation in itertools.permutations(other_nodes))
        min_distance = None
        trip_distances_cache = {}
        for visit_order in debugger.stepping(visit_orders):
            distance = sum(
                self.get_shortest_distance(
                    node_a,
                    node_b,
                    nodes,
                    trip_distances_cache=trip_distances_cache)
                for node_a, node_b in get_windows(visit_order, 2))
            if min_distance is None or distance < min_distance:
                min_distance = distance
            if debugger.should_report():
                debugger.default_report(f"min distance: {min_distance}")

        return min_distance
    def get_trip_distance(
        self,
        trip: Iterable[int],
        trip_distances_cache: Optional[Dict[Tuple[int, ...], int]] = None,
    ) -> Optional[int]:
        """
        >>> Graph({}).get_trip_distance(())
        >>> Graph({}).get_trip_distance((1,))
        >>> Graph({}).get_trip_distance((1, 2))
        >>> Graph({(1, 3): 3, (3, 2): 4}).get_trip_distance((1, 2))
        >>> Graph({(2, 1): 3}).get_trip_distance((1, 2))
        >>> Graph({(1, 2): 3}).get_trip_distance((1, 2))
        3
        >>> Graph({(1, 2): 3, (2, 4): 4}).get_trip_distance((1, 2, 3))
        >>> Graph({(1, 2): 3, (3, 2): 4}).get_trip_distance((1, 2, 3))
        >>> Graph({}).get_trip_distance((1, 2, 3))
        >>> Graph({(1, 2): 3, (2, 3): 4}).get_trip_distance((1, 2, 3))
        7
        """
        if trip_distances_cache is None:
            trip_distances_cache = {}
        if trip in trip_distances_cache:
            return trip_distances_cache[trip]
        total_distance = 0
        at_least_2_nodes = False
        for pair in get_windows(trip, 2):
            pair: Tuple[int, int]
            distance = self.edges.get(pair)
            if distance is None:
                trip_distances_cache[trip] = None
                return None
            at_least_2_nodes = True
            total_distance += distance

        if not at_least_2_nodes:
            trip_distances_cache[trip] = None
            return None

        trip_distances_cache[trip] = total_distance
        return total_distance
    def get_keys(self,
                 _count: Optional[int] = 64,
                 start: int = 0,
                 window_size: int = 1000,
                 debug: bool = False) -> Iterable[int]:
        windows = get_windows(self.get_all_hashes(start), window_size + 1)
        key_count = 0
        if debug:
            timer = Timer()
        for index, window in enumerate(windows):
            _hash, next_hashes = window[0], window[1:]
            if self.is_hash_a_key(_hash, next_hashes):
                key_count += 1
                yield key_count, index, _hash
                if _count is not None and key_count >= _count:
                    break

            if debug:
                if index % 1000 == 0:
                    print(f"Count: {index}, time: "
                          f"{timer.get_pretty_current_duration()}, key count: "
                          f"{key_count}/{_count}")
def compute_labels(data, dataarray, w, window_size, coordinate, should_plot):
    click.echo('--- Compute labels ---')

    desired_csv = load_csvs(data, should_preprocess=False)
    data_csv = load_csvs(dataarray, should_preprocess=False)

    for index, flight_data_number in enumerate(data_csv):
        flight_data_number['data']['flight_number'] = index

    # For Test
    last_second = desired_csv[0]['data']['seconds'].iloc[-1]

    #click.echo('  - data        : %s ' % data)
    click.echo('  - dataarray   : %s ' % dataarray)
    click.echo('  - w           : %d ' % w)

    click.echo('\nRunning...')

    # For plot-pdfs title
    ts = time.time()
    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H%M%S')
    
    flight_dict = {}
    #flight_full_data = pd.DataFrame()
    flight_dataframe = pd.DataFrame()

    for time_window in range(0, last_second, window_size):
        click.echo('Time window is: %d ' % time_window)

        desired_window = get_windows(desired_csv, time_window, time_window + window_size - 1)
        data_window = get_windows(data_csv, time_window, time_window + window_size - 1)
        #print(data_window)


        # Compute all DTWs

        desired_flight = desired_window[0]['window'][coordinate].tolist()

        dtws = []

        for index, flight_window in enumerate(data_window):
            # Compute DTW of window with desired_window ; window[coordinate].tolist()
            pos_data = flight_window['window'][coordinate].tolist()
            flight_seconds = flight_window['window']['seconds'].tolist()
            
            dtw = dtw_distance(desired_flight, pos_data, w)
            # Add key DTW to window ; window['dtw']
            flight_window['dtw'] = dtw
            # Track all DTWS ; append dtw value
            dtws.append( dtw )
            #click.echo('Done. Plots have been saved.')
            
            print(dtw)

            if should_plot:
                # plotting desired data with each flight for each window
                plt.plot(desired_flight, label='Desired')
                plt.plot(pos_data, label='Unlabelled')
                #plt.xlim(0,15)
                plt.ylim(0,2.1)
                plt.xlabel('Seconds')
                plt.ylabel('Meters')
                plt.title('Desired and unlabelled data. DTW = {}'.format(dtw))
                plt.legend()
                plt.grid(True)
                plt.savefig('plots/z/desired_each_flight/dtwValues_{}_{}_{}.jpeg'.format(st, time_window, index))
                plt.cla()
       
        
        print(dtws)
        if should_plot:
            print(dtws)
            dtws = [550 if x==9.223372036854776e+18 else x for x in dtws]
            # plotting dtw values of all flights for each window
            plt.figure(figsize=(20, 10))
            plt.plot(dtws, color='#006600', alpha=.9) # marker='o', linestyle='--',
            plt.ylim([0, 600])

            flights_x = [i for i in range(0, 53)]
            for flight_x, dtw in zip(flights_x, dtws):
                plt.text(flight_x, dtw, str(dtw))

            plt.grid(True)
            #plt.show()
            plt.savefig('plots/z/dtw_values/dtwValues_{}_{}.pdf'.format(st, time_window))
            plt.cla()

            click.echo('Done. Plots have been saved.')


        # Loop again and update labels
            
        for flight_window in data_window:

            if flight_window['dtw'] < 100:
                flight_window['label'] = 1
            elif flight_window['dtw'] < 299:
                flight_window['label'] = 2
            else: 
                flight_window['label'] = 3

            # Save to flight_dict
            fn = flight_window['window']['flight_number'].iloc[0]
            flight_full_data = flight_window['window']
            flight_full_data.is_copy = False
            flight_full_data['label'] = flight_window['label']
            flight_full_data['dtw'] = flight_window['dtw']

            if fn not in flight_dict.keys():
                flight_dict[fn] = flight_full_data # flight_window['window']
            else:
                flight_dict[fn] = pd.concat([flight_dict[fn], flight_full_data]) # flight_window['window']

    for flight_index in flight_dict:
        flight_dataframe = pd.DataFrame(flight_dict[flight_index])
        flight_dataframe.to_csv('data/labelled/z/f_{}.csv'.format(flight_index), sep=';', encoding='utf-8')
        print('f_{}.csv saved!'.format(flight_index))

   
    click.echo('\nDone.')
Example #10
0
def lda(path):

    MIN_COUNTS = 20
    MAX_COUNTS = 1800
    # words with count < MIN_COUNTS
    # and count > MAX_COUNTS
    # will be removed

    MIN_LENGTH = 15
    # minimum document length
    # (number of words)
    # after preprocessing

    # half the size of the context around a word
    HALF_WINDOW_SIZE = 5
    # it must be that 2*HALF_WINDOW_SIZE < MIN_LENGTH

    nlp = spacy.load('en')

    dataset = fetch_data_groups(data_home=path)
    docs = dataset['data']
    paths = dataset['filenames']

    filenames = [path.split("/")[-1] for i, path in enumerate(paths)]
    docs = [(i, doc) for i, doc in enumerate(docs)]

    encoded_docs, decoder, word_counts = preprocess(docs, nlp, MIN_LENGTH,
                                                    MIN_COUNTS, MAX_COUNTS)

    # new ids will be created for the documents.
    # create a way of restoring initial ids:
    doc_decoder = {i: doc_id for i, (doc_id, doc) in enumerate(encoded_docs)}
    filename_decoder = {
        i: filenames[encoded_docs[i][0]]
        for i in range(len(encoded_docs))
    }

    data = []
    # new ids are created here
    for index, (_, doc) in enumerate(encoded_docs):
        windows = get_windows(doc, HALF_WINDOW_SIZE)
        # index represents id of a document,
        # windows is a list of (word, window around this word),
        # where word is in the document
        data += [[index, w[0]] + w[1] for w in windows]

    data = np.array(data, dtype='int64')

    word_counts = np.array(word_counts)
    unigram_distribution = word_counts / sum(word_counts)

    vocab_size = len(decoder)
    embedding_dim = 50

    # train a skip-gram word2vec model
    texts = [[str(j) for j in doc] for i, doc in encoded_docs]
    model = models.Word2Vec(texts,
                            size=embedding_dim,
                            window=5,
                            workers=4,
                            sg=1,
                            negative=15,
                            iter=70)
    model.init_sims(replace=True)

    word_vectors = np.zeros((vocab_size, embedding_dim)).astype('float32')
    for i in decoder:
        word_vectors[i] = model.wv[str(i)]

    texts = [[decoder[j] for j in doc] for i, doc in encoded_docs]
    dictionary = corpora.Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]

    n_topics = 2
    lda = models.LdaModel(corpus,
                          alpha=0.9,
                          id2word=dictionary,
                          num_topics=n_topics)
    corpus_lda = lda[corpus]

    doc_weights_init = np.zeros((len(corpus_lda), n_topics))
    for i in range(len(corpus_lda)):
        topics = corpus_lda[i]
        for j, prob in topics:
            doc_weights_init[i, j] = prob

    print(os.getcwd())
    np.save('newsgroups/utils/npy/data.npy', data)
    np.save('newsgroups/utils/npy/word_vectors.npy', word_vectors)
    np.save('newsgroups/utils/npy/unigram_distribution.npy',
            unigram_distribution)
    np.save('newsgroups/utils/npy/decoder.npy', decoder)
    np.save('newsgroups/utils/npy/doc_decoder.npy', doc_decoder)
    np.save('newsgroups/utils/npy/doc_weights_init.npy', doc_weights_init)
    np.save('newsgroups/utils/npy/filename_decoder.npy', filename_decoder)

    lst = []
    for i, topics in lda.show_topics(n_topics, formatted=False):
        lst.append('topic' + str(i) + ':' + ' '.join([t for t, _ in topics]))

    return lst
class Password(str):
    FORBIDDEN_LETTERS = PASSWORD_FORBIDDEN_LETTERS
    THREE_INCREASING_LETTERS_SEQUENCES = [
        "".join(letters) for letters in get_windows(string.ascii_lowercase, 3)
        if not set(letters) & set(PASSWORD_FORBIDDEN_LETTERS)
    ]
    NEXT_LETTER_AND_OVERFLOWED = {
        letter: (next_letter, next_letter == 'a')
        for letter, next_letter in (
            (letter, (next_letter if next_letter not in
                      PASSWORD_FORBIDDEN_LETTERS else alternate_next_letter))
            for letter, next_letter, alternate_next_letter in zip(
                tuple(string.ascii_lowercase),
                tuple(string.ascii_lowercase)[1:] + ('a', ),
                tuple(string.ascii_lowercase)[2:] + ('a', 'b'),
            ))
    }

    re_two_instances_of_two_repeated_letters = re.compile(r"(\w+)\1.*(\w)\2")

    def get_next_password(
            self,
            debugger: Debugger = Debugger(enabled=False),
    ) -> 'Password':
        """
        >>> Password('abcdefgh').get_next_password()
        'abcdffaa'
        >>> Password('ghijklmn').get_next_password()
        'ghjaabcc'
        """
        debugger.reset()
        for password in debugger.stepping(self.get_next_password_candidates()):
            if password.is_valid():
                return password
            debugger.default_report_if(password)

    def get_next_password_candidates(self) -> Iterable['Password']:
        """
        >>> # noinspection PyUnresolvedReferences
        >>> [password for password, _
        ...  in zip(Password('xx').get_next_password_candidates(), range(6))]
        ['xy', 'xz', 'ya', 'yb', 'yc', 'yd']
        """
        current = self
        while True:
            current = current.get_next_password_candidate()
            yield current

    def get_next_password_candidate(self) -> 'Password':
        """
        >>> Password('xx').get_next_password_candidate()
        'xy'
        >>> Password('xy').get_next_password_candidate()
        'xz'
        >>> Password('xz').get_next_password_candidate()
        'ya'
        """
        position = -1
        result = list(self)
        while True:
            letter = result[position]
            next_letter, overflow = self.NEXT_LETTER_AND_OVERFLOWED[letter]
            result[position] = next_letter
            if not overflow:
                break
            position -= 1
            if -position > len(result):
                raise Exception(f"Password overflowed: {''.join(result)}")

        cls = type(self)
        return cls(''.join(result))

    def is_valid(self) -> bool:
        """
        >>> Password('abcdefgh').is_valid()
        False
        >>> Password('abcdffaa').is_valid()
        True
        >>> Password('ghijklmn').is_valid()
        False
        >>> Password('ghjaabcc').is_valid()
        True
        """
        return (self.has_three_increasing_letters()
                and self.does_not_include_forbidden_letters()
                and self.contains_two_instances_of_two_repeated_letters())

    def has_three_increasing_letters(self) -> bool:
        """
        >>> Password('hijklmmn').has_three_increasing_letters()
        False
        >>> Password('abbceffg').has_three_increasing_letters()
        False
        """
        return any(sequence in self
                   for sequence in self.THREE_INCREASING_LETTERS_SEQUENCES)

    def does_not_include_forbidden_letters(self) -> bool:
        """
        >>> Password('hijklmmn').does_not_include_forbidden_letters()
        False
        """
        return not any(letter in self for letter in self.FORBIDDEN_LETTERS)

    def contains_two_instances_of_two_repeated_letters(self) -> bool:
        """
        >>> Password('abbceffg')\\
        ...     .contains_two_instances_of_two_repeated_letters()
        True
        >>> Password('abbcegjk')\\
        ...     .contains_two_instances_of_two_repeated_letters()
        False
        """
        return bool(
            self.re_two_instances_of_two_repeated_letters.findall(self))