Esempio n. 1
0
def readDocx(path: str, result: str, *keys):
    with open(result, 'w+') as res:
        print('読み込: %s' % path)
        file = docx.Document(path)
        for t in file.tables:  # type:Table
            for r in t.rows:  # type:_Row
                for c in r.cells:  # type:_Cell
                    text = c.text.replace('\n', '').replace('\r', '')
                    [res.write(sep.join([path, key, text])) for key in flatten(keys) if text.find(key) != -1]
        tmp = ''
        for par in file.paragraphs:
            str1 = tmp + par.text
            text = str1.replace(' ', '').replace(' ', '')
            [res.write(sep.join([path, key, text])) for key in flatten(keys) if text.find(key) != -1]
            tmp = par.text
Esempio n. 2
0
def get_LDA_tags(model, dictionary, text):
    tokenized_text = process_blog_post(text)
    bow = dictionary.doc2bow(tokenized_text)
    topics = model.get_document_topics(bow,
                                       minimum_probability=MIN_PROB_THRESHOLD)
    tags = [
        get_topic_tags(model, dictionary, topic_idx)
        for topic_idx, prob in topics
    ]
    return list(flatten(tags))
Esempio n. 3
0
File: word.py Progetto: skdtd/Script
 def read(path: str, *param, sep: str = ',') -> list:
     if str == '' or str is None:
         return []
     res_list = []
     tmp = ''
     file = docx.Document(path)
     for t in file.tables:
         for r in t.rows:
             for par in r.cells:
                 str1 = tmp + par.text
                 text = tmp + str1.replace(' ', '').replace(' ', '')
                 if text == '' or text is None:
                     continue
                 for key in flatten(param):
                     if text.find(key) != -1:
                         res_list.append(
                             sep.join([
                                 path, key,
                                 par.text.replace('\n',
                                                  '').replace('\r', '')
                             ]))
                         tmp = ''
                     else:
                         tmp = par.text
     tmp = ''
     for par in file.paragraphs:
         str1 = tmp + par.text
         text = str1.replace(' ', '').replace(' ', '')
         if text == '' or text is None:
             continue
         for key in flatten(param):
             if text.find(key) != -1:
                 res_list.append(
                     sep.join([
                         path, key,
                         par.text.replace('\n', '').replace('\r', '')
                     ]))
                 tmp = ''
             else:
                 tmp = par.text
     return res_list
Esempio n. 4
0
    def parse_page(self, fpath, data, page_num):
        list_matches = []
        for ind_type, ind_regex in self.patterns.items():
            matches = ind_regex.findall(data)
            for ind_match in matches:
                if isinstance(ind_match, tuple):
                    ind_match = ind_match[0]

                if self.is_whitelisted(ind_match, ind_type):
                    continue

                if ind_type in self.defang:
                    ind_match = re.sub(r"\[\.\]", ".", ind_match)

                if self.dedup:
                    if (ind_type, ind_match) in self.dedup_store:
                        continue

                    self.dedup_store.add((ind_type, ind_match))
                list_matches.append(
                    self.handler.print_match(fpath, page_num, ind_type, ind_match)
                )

        if self.custom_indicators:
            for indicator_type, indicator_dict in self.custom_indicators.items():
                indicators = set(flatten(indicator_dict.values()))
                indicators = ["\\b{}\\b".format(v) for v in indicators]
                indicators = "|".join(indicators)
                findings = re.findall(indicators, data, re.IGNORECASE)

                if len(findings) > 0 and type(findings[0]) != tuple:
                    for stix_id, names in indicator_dict.items():
                        lower_names = set(map(lambda x: x.lower(), names))
                        for finding in findings:
                            try:
                                if finding.lower() in lower_names:
                                    list_matches.append(
                                        self.handler.print_match(
                                            fpath, page_num, indicator_type, stix_id
                                        )
                                    )
                            except Exception as e:
                                self.handler.print_error(findings, e)

        return list_matches
Esempio n. 5
0
def remove_trace(nodes):
    print(type(nodes))
    o = [
        re.sub('\\[.*\\]', '', i).strip().replace('\t', '').replace('\n', '')
        for i in dot.body
    ]
    o = [i.split(" -> ") if " -> " in i else [i] for i in o]
    print(o)
    print(nodes)
    temp = [(i, j) for i, j in enumerate(o) for n in nodes if n in j]
    print(temp)
    if not temp:
        return
    index, values = list(zip(*temp))
    values = set(flatten(values))
    for i in sorted(index)[::-1]:
        dot.body.pop(i)
    print(nodes, values)
    for i in nodes:
        values.remove(i)
    return remove_trace(list(values))
Esempio n. 6
0
def matchers(text, list_of_regex):
    from setuptools.namespaces import flatten
    return set(
        flatten(map(lambda regex: re.findall(regex, text), list_of_regex)))
Esempio n. 7
0
def flat10(lst):
    from nltk import flatten
    return flatten(lst)
Esempio n. 8
0
def flat8(lst):
    from matplotlib.cbook import flatten
    return list(flatten(lst))
Esempio n. 9
0
def flat7(lst):
    from pandas.core.common import flatten
    return list(flatten(lst))
Esempio n. 10
0
def flat6(lst):
    from setuptools.namespaces import flatten
    return flatten(lst)
Esempio n. 11
0
							m4a_tag(audio_dirname, audio_filename, artist, album, track, tracks, title, year, genre, bpms, compilation)
					except:
						ERROR("Error: Failed to write tags to " + audio_filename + ".")
						raise fragile.Break
			else:
				ERROR("Error: Names for tags in file " + audio_filename + " could not be detected.")
				raise fragile.Break
	except:
		ERROR("Error: File " + audio_filename + " could not be decoded.")



# -------------------- MAIN --------------------
if __name__ == "__main__" :
	# Iterate through acquired list of files
	files = list(flatten(args.files))
	
	audio_files = []
	
	for i in files:
		audio_files.extend(sorted(filter(lambda p: p.suffix in {".mp3", ".flac", ".m4a"}, Path(i).glob("**/*"))))
	audio_files = [str(Path(i)) for i in audio_files]
	
	# Multiprocessing
	if (__DEBUG__): print(f'Number of cores: {__CPU__}')
	
	# Process audio files
	pool = multiprocessing.Pool(processes=__CPU__)
	pool.map(func=process_audio, iterable=audio_files, chunksize=1)
	pool.close()
	pool.join()