Beispiel #1
0
    def post(self):
        parser.add_argument('salto_texts_list', action='append')
        args = parser.parse_args()
        st_texts_list = args['salto_texts_list']
        kw_list = []

        script_folder, script_name = os.path.split(os.path.abspath(__file__))

        output_folder_name = os.path.abspath(script_folder)

        logFile = os.path.join(output_folder_name, script_name + ".log")
        logging.basicConfig(filename=logFile, level=logging.WARNING)

        text_number = 0
        for st in st_texts_list:
            text_number += 1
            outputDirectory = os.path.join("/opt/keywords-extractor/keywords/",
                                           "temp_folder_" + str(text_number))

            make_output_directory(outputDirectory)
            st_u8 = st.encode('utf8')
            key_word_extractor = KeywordExtractor(st_u8, outputDirectory)
            key_words_set = key_word_extractor.extract_keywords()

            kw_list.append(list(key_words_set))

            #Remove the directory containing the output files
            shutil.rmtree(outputDirectory)

        return {'kw': kw_list}
Beispiel #2
0
 def test_it_extract_keywords_10(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "10152.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(key_words_set, {
         'incidente', 'contadino', 'investito da un trattore', 'Kuppelwies'
     })
Beispiel #3
0
 def test_it_extract_keywords_8(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "10046.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Alessandro Vicentini', 'Bolzano',
             'È una questione strutturale', 'La crisi morde', 'auto'
         })
Beispiel #4
0
 def test_de_extract_keywords_30(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "21870.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Greta Marcolongo', 'Live-Musik', 'Andrea Maffei',
             'Fußball-Übertragungen'
         })
Beispiel #5
0
 def test_it_extract_keywords_13(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "10187.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Cisl Alto Adige', 'imposta sul valore aggiunto',
             "aumento dell'Iva", 'euro', 'Freiheitlichen'
         })
Beispiel #6
0
 def test_de_extract_keywords_48(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "22008.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Politikerrenten', 'Landeshauptmann Arno Kompatscher',
             'Freiheitlichen', 'Südtiroler Frühling'
         })
Beispiel #7
0
 def test_de_extract_keywords_32(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "21905.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Sexarbeiterin', 'Day', 'Fotostrecke', 'Problem', 'Xenia',
             'internationale Hurentag'
         })
Beispiel #8
0
 def test_de_extract_keywords_44(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "21986.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'René Benkos', 'Busbahnhofsareals', 'Willi Hüsler',
             'Erlebnishaus Südtirol', 'Kaufhausprojekt', 'Boris Podrecca'
         })
Beispiel #9
0
 def test_de_extract_keywords_35(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "21921.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Karl Zeller', 'SVP-Senator', 'Abänderungsantrag',
             'RAI-Sitze der sprachlichen Minderheiten'
         })
Beispiel #10
0
 def test_de_extract_keywords_43(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "21984.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Andreas Pöder', 'Regionalratsabgeordnete',
             'Movimento 5 Stelle', 'Paul Köllensperger'
         })
Beispiel #11
0
 def test_de_extract_keywords_40(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "21965.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Freiheitlichen', 'Stocker', 'Landtagswahlen', 'Wählerstimme',
             'Pius', 'Ulli', 'SVP'
         })
Beispiel #12
0
 def test_de_extract_keywords_53(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "22056.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Frau', 'schwer', 'Kuh', 'Franz', 'Jungbäuerin', 'Mölten',
             'Sattlerhüttenwirt'
         })
Beispiel #13
0
 def test_it_extract_keywords_11(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "10157.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'politico', 'Andreas Perugini', 'MoVimento 5 Stelle',
             'presentato', 'programma', 'estromissione'
         })
Beispiel #14
0
 def test_it_extract_keywords_4(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "1028.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'dialogo', 'Richard Theiner', 'Svp', 'autonomia integrale',
             'sorriso degli italiani'
         })
Beispiel #15
0
 def test_it_extract_keywords_24(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "10293.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Giunta comunale', 'Bolzano', 'Mercatino di Natale', 'turismo',
             'Alto Adige', 'gennaio', 'ambientalista'
         })
Beispiel #16
0
 def test_it_extract_keywords_22(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "10288.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Bizzo', 'tedesco', 'italiano', 'quotidiano',
             'Innovation Festival', 'Alto Adige'
         })
Beispiel #17
0
 def test_it_extract_keywords_20(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "10275.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Karl Zeller', 'Svp', 'Eva Klotz', "accordo", 'rifugio',
             'tedesco', 'provocazione', 'funzionario', 'mozione', 'difenda'
         })
Beispiel #18
0
 def test_de_extract_keywords_47(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "22003.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Freiheitlichen', 'SVP und PD', 'Volksabstimmung',
             'Seilbahnprojekt', 'Brixen'
         })
Beispiel #19
0
 def test_de_extract_keywords_52(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "22051.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Mädchen', 'Heterobaby', 'zwanzigminütigen Kurzfilm', 'Welt',
             'homosexuellen Menschen', 'Jungs'
         })
Beispiel #20
0
 def test_de_extract_keywords_41(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "21969.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Bewerbungsdossiers', 'Giorgio Orsoni',
             'Kulturhauptstadtregion', 'Christian Tommasini',
             'Alberto Stenico', 'Luis Durnwalder', 'Venedig', 'Nordest'
         })
Beispiel #21
0
 def test_it_extract_keywords_16(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "10225.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'pulizia linguistica', 'Florian Kronbichler', 'quota Svp',
             'polemica', 'risolve', 'teoria', 'soluzione',
             'rifugi di montagna', 'ribadisce'
         })
Beispiel #22
0
 def test_de_extract_keywords_50(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "22024.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertIn('rechtextreme Bewegung', key_words_set)
     self.assertIn('rechtsradikale Bewegung', key_words_set)
     self.assertIn('DIGOS-Ermittlungen', key_words_set)
     self.assertIn('Luigi Spagnoli', key_words_set)
     self.assertIn('legge Scelba', key_words_set)
     self.assertIn('Socialismo Nazionale', key_words_set)
Beispiel #23
0
 def test_it_extract_keywords_21(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "10286.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Arno Kompatscher', 'tre domande', 'entrature particolari',
             'il prossimo Landeshauptmann', 'democrazia diretta', 'firma',
             'legge provinciale', 'Svp'
         })
Beispiel #24
0
 def test_it_extract_keywords_28(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "10328.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'studio Besier', 'comitato proALTvor', 'Tiziana Campagnoli',
             'Bressanone', 'Provincia', 'Comune', 'Stephan Besier',
             'tecnici', 'incontro', 'interrompe', 'ancora valido', 'sindaco'
         })
Beispiel #25
0
 def test_it_extract_keywords_15(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "10212.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Bolzano', 'quartiere', 'partecipante',
             'bolzanobici around the world', 'dedicata alle due ruote',
             'popolare', 'iniziativa'
         })
Beispiel #26
0
    def test_it_extract_keywords(self):
        key_word_extractor = KeywordExtractor(
            os.path.join(self.script_folder, "test"), "1008-it.txt",
            self.output_folder)
        key_words_set = key_word_extractor.extract_keywords()

        self.assertTrue('pensione' in key_words_set)
        self.assertTrue('struttura' in key_words_set)
        self.assertTrue('don Chisciotte' in key_words_set)
        self.assertTrue('Franco Basaglia' in key_words_set)
        self.assertTrue('Lorenzo Toresini' in key_words_set)
Beispiel #27
0
 def test_it_extract_keywords_9(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "10053.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'dossier', 'Bolzano', 'sito', 'Venezia e Nordest',
             'capitale europea della cultura', 'candidatura',
             'caldo involucro', 'raccomandazione'
         })
Beispiel #28
0
 def test_it_extract_keywords_23(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "10290.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'zona', 'acque', 'Luis Durnwalder', 'Bolzano Sud',
             'SEL e Azienda Elettrica', 'capitale dei rifiuti',
             'Ai Piani 5 anni di cattivi odori', 'unter einem Dach',
             'Das Kreuz mit den Maturabällen', 'Altri rifiuti verso Bolzano'
         })
Beispiel #29
0
 def test_de_extract_keywords_55(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "21717-de.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'DFB-Teams', 'Süddeutsche Zeitung', 'Trainingslager',
             'Rennfahrer', 'Pascal Wehrlein', 'verletzt', 'Oliver Bierhoff',
             'Heinrich Dorfer', 'Nico Rosberg', 'Ich hätte tot sein können',
             'um einen blöden Zufall', 'schweren Schock'
         })
Beispiel #30
0
 def test_de_extract_keywords_31_2(self):
     key_word_extractor = KeywordExtractor(
         os.path.join(self.script_folder, "test"), "21891.txt",
         self.output_folder)
     key_words_set = key_word_extractor.extract_keywords()
     self.assertEqual(
         key_words_set, {
             'Vermisstenfall', 'Identifizierung', 'Etsch',
             'Zahnarztbefunde', 'Fluss', 'Verwesungsprozess',
             'Frauenleiche', 'Serafino Baldessari', 'Davide Baldessari',
             'Verona'
         })