Python sub Beispiele, re2.sub Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_re.py Projekt: biggerpan-inc/pyre2-1

 def test_bug_449000(self):
     # Test for sub() on escaped characters
     self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
                      'abc\ndef\n')
     self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'), 'abc\ndef\n')
     self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'), 'abc\ndef\n')
     self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'), 'abc\ndef\n')

Beispiel #2

0

Datei anzeigen

Datei: parse_pe.py Projekt: kevoreilly/CAPEv2

    def get_dll_exports(self) -> str:
        file_type = self._get_filetype(self.file_data)
        if HAVE_PEFILE and file_type and ("PE32" in file_type
                                          or "MS-DOS executable" in file_type):
            try:
                pe = pefile.PE(self.file_path)
                if hasattr(pe, "DIRECTORY_ENTRY_EXPORT"):
                    exports = []
                    for exported_symbol in pe.DIRECTORY_ENTRY_EXPORT.symbols:
                        try:
                            if not exported_symbol.name:
                                continue
                            if isinstance(exported_symbol.name, bytes):
                                exports.append(
                                    re.sub(b"[^A-Za-z0-9_?@-]", b"",
                                           exported_symbol.name).decode())
                            else:
                                exports.append(
                                    re.sub("[^A-Za-z0-9_?@-]", "",
                                           exported_symbol.name))
                        except Exception as e:
                            log.error(e, exc_info=True)

                    return ",".join(exports)
            except Exception as e:
                log.error("PE type not recognised")
                log.error(e, exc_info=True)

        return ""

Beispiel #3

0

Datei anzeigen

Datei: autoparser.py Projekt: aih/uscites

def parsers(uscfiles, findreplace):
    parsedfiles = []
    for counter, section in enumerate(uscfiles):

        #print ""
        #print "File", counter
        parsedfile = subfile(section, findreplace)
        #print parsedfile
        # Replace Multiple Section references with links
        #   Include [^<] to make sure no group is transformed twice
        pattern =  r'@@@\s[Ss]ections?\s([^<]*?)@@@@@(.*?)@@'
        pattern_replace = [r'%s' % u'(\d+\w*(?:\(\w+\))*[-|–]?\d*)([, @])', r'<a href="/laws/target/%s/\1" class="sec">\1</a>\2']  
        parsedfile = parsesections(pattern, pattern_replace, parsedfile)
        #parsedfile = re.sub(r'@of-ref@', r'ref-Title-'+title, parsedfile)
        parsedfile = re.sub(r'@@ref-.*?@', r'', parsedfile)
        #parsedfile = re.sub(r'ref-title-this', r'ref-title-'+title, parsedfile)



        # Encode Named Acts by removing lowercase and non-word characters, and appending the length of the name w/o non-word characters
        #pattern =  r'@@ref-namedact-(.*?)@@'
        pattern =  r'/ref-namedact-(.*?)/'
        parsedfile = parsenamedacts(pattern, parsedfile)

        parsedfile = re.sub(r'@of-ref@', r'ref-title-this', parsedfile)
        parsedfile = re.sub(r'@@ref-.*?@', r'', parsedfile)
        
        # Remove remaining @
        parsedfile = parsedfile.replace('@','')#.translate(None, '@')
        parsedfiles.append(parsedfile)
    return parsedfiles

Beispiel #4

0

Datei anzeigen

    def build_options(self):
        """Generate analysis options.
        @return: options dict.
        """
        options = {}

        options["id"] = self.task.id
        options["ip"] = self.machine.resultserver_ip
        options["port"] = self.machine.resultserver_port
        options["category"] = self.task.category
        options["target"] = self.task.target
        options["package"] = self.task.package
        options["options"] = self.task.options
        options["enforce_timeout"] = self.task.enforce_timeout
        options["clock"] = self.task.clock
        options["terminate_processes"] = self.cfg.cuckoo.terminate_processes

        if not self.task.timeout or self.task.timeout == 0:
            options["timeout"] = self.cfg.timeouts.default
        else:
            options["timeout"] = self.task.timeout

        if self.task.category == "file":
            options["file_name"] = File(self.task.target).get_name()
            options["file_type"] = File(self.task.target).get_type()
            # if it's a PE file, collect export information to use in more smartly determining the right
            # package to use
            options["exports"] = ""
            if HAVE_PEFILE and ("PE32" in options["file_type"] or
                                "MS-DOS executable" in options["file_type"]):
                try:
                    pe = pefile.PE(self.task.target)
                    if hasattr(pe, "DIRECTORY_ENTRY_EXPORT"):
                        exports = []
                        for exported_symbol in pe.DIRECTORY_ENTRY_EXPORT.symbols:
                            try:
                                if not exported_symbol.name:
                                    continue
                                if isinstance(exported_symbol.name, bytes):
                                    exports.append(
                                        re.sub(b"[^A-Za-z0-9_?@-]", b"",
                                               exported_symbol.name).decode(
                                                   "utf-8"))
                                else:
                                    exports.append(
                                        re.sub("[^A-Za-z0-9_?@-]", "",
                                               exported_symbol.name))
                            except Exception as e:
                                log.error(e, exc_info=True)

                        options["exports"] = ",".join(exports)
                except Exception as e:
                    log.error("PE type not recognised")
                    log.error(e, exc_info=True)

        # options from auxiliar.conf
        for plugin in self.aux_cfg.auxiliar_modules.keys():
            options[plugin] = self.aux_cfg.auxiliar_modules[plugin]

        return options

Beispiel #5

0

Datei anzeigen

Datei: vbadeobf.py Projekt: CIRCL/cuckoo-modified

def handle_techniques(line, **opts):

    vb_vars = opts["vb_vars"]
    enc_func_name = opts["enc_func_name"]
    decrypt_func = opts["decrypt_func"]

    def var_substitute(m):
        var = m.group(1)

    line = line.replace('"', '"""')
    line = re.sub(r'"""([A-F0-9]{2,})"""', decode_hex, line)
    line = re.sub(r'"""([\w_+=/]{2,})"""', decode_base64, line)
    line = re.sub(r'(?i)Chr[A-Z$]\(Asc[A-Z$](.+?)\)\)', r"\1", line)
    line = re.sub(r'(?i)Asc[A-Z$]\("""(\w)\w*"""\)', lambda m: str(ord(m.group(1))), line)
    line = re.sub(r'(?i)((?:Chr[A-Z$]?\(\d+\)\s*&?\s*)+)', decode_chr, line)
    line = re.sub(r'(?i)\b%s\s*\(\w+\("""(.+?)"""\),\s*\w+\("""(.+?)"""' % enc_func_name, decrypt_func, line)
    line = re.sub(r'(?i)\b%s\((?:""")?(.+?)(?:""")?,\s*(?:""")?(.+?)(?:""")?\)' % enc_func_name, decrypt_func, line)
    line = re.sub(r'(?i)StrReverse\(.+?"""(.+?)"""\)', decode_reverse, line)
    line = re.sub(r'""".+?"""\s+&+\s+""".+?""".+', concatenate, line)
    while "Chr(Asc(" in line:
        lastline = line
        line = re.sub(r'(?i)Chr\(Asc\((.+?)\)\)', r"\1",line)
        if line == lastline:
            break
    # Remove quotes before regexing against them.
    line = line.replace('""" + """','')
    line = line.replace('"""','')
    # Remove a few concat patterns. Theres a bug with some obfuscation
    # techniques.
    line = line.replace(" + ", "")
    line = line.replace(" & ","")
    return line

Beispiel #6

0

Datei anzeigen

Datei: sfp_openstreetmap.py Projekt: ziqi521/spiderfoot

    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        if eventData in self.results:
            self.sf.debug("Skipping " + eventData + " as already mapped.")
            return None
        else:
            self.results[eventData] = True

        address = eventData

        # Skip post office boxes
        if address.lower().startswith('po box'):
            self.sf.debug("Skipping PO BOX address")
            return None

        rx1 = re.compile(r'^(c/o|care of|attn:|attention:)\s+[0-9a-z\s\.]',
                         flags=re.IGNORECASE)
        # Remove address prefixes for delivery instructions
        address = re.sub(rx1, r'', address)

        rx2 = re.compile(r'^(Level|Floor|Suite|Room)\s+[0-9a-z]+,',
                         flags=re.IGNORECASE)
        # Remove address prefixes known to return no results (floor, level, suite, etc).
        address = re.sub(rx2, r'', address)

        # Search for address
        data = self.query(eventData)

        # Usage Policy mandates no more than 1 request per second
        time.sleep(1)

        if data is None:
            self.sf.debug("Found no results for " + eventData)
            return None

        self.sf.info("Found " + str(len(data)) + " matches for " + eventData)

        for location in data:
            try:
                lat = location.get('lat')
                lon = location.get('lon')
            except BaseException as e:
                self.sf.debug("Failed to get lat/lon: " + str(e))
                continue

            if not lat or not lon:
                continue

            coords = str(lat) + "," + str(lon)
            self.sf.debug("Found coordinates: " + coords)

            evt = SpiderFootEvent("PHYSICAL_COORDINATES", coords,
                                  self.__name__, event)
            self.notifyListeners(evt)

Beispiel #7

0

Datei anzeigen

def handle_techniques(line, **opts):

    vb_vars = opts["vb_vars"]
    enc_func_name = opts["enc_func_name"]
    decrypt_func = opts["decrypt_func"]

    def var_substitute(m):
        var = m.group(1)

    line = line.replace('"', '"""')
    line = re.sub(r'"""([A-F0-9]{2,})"""', decode_hex, line)
    line = re.sub(r'"""([\w_+=/]{2,})"""', decode_base64, line)
    line = re.sub(r"(?i)Chr[A-Z$]\(Asc[A-Z$](.+?)\)\)", r"\1", line)
    line = re.sub(r'(?i)Asc[A-Z$]\("""(\w)\w*"""\)', lambda m: ord(m.group(1)), line)
    line = re.sub(r"(?i)((?:Chr[A-Z$]?\(\d+\)\s*&?\s*)+)", decode_chr, line)
    line = re.sub(r'(?i)\b%s\s*\(\w+\("""(.+?)"""\),\s*\w+\("""(.+?)"""' % enc_func_name, decrypt_func, line)
    line = re.sub(r'(?i)\b%s\((?:""")?(.+?)(?:""")?,\s*(?:""")?(.+?)(?:""")?\)' % enc_func_name, decrypt_func, line)
    line = re.sub(r'(?i)StrReverse\(.+?"""(.+?)"""\)', decode_reverse, line)
    line = re.sub(r'""".+?"""\s+&+\s+""".+?""".+', concatenate, line)
    while "Chr(Asc(" in line:
        lastline = line
        line = re.sub(r"(?i)Chr\(Asc\((.+?)\)\)", r"\1", line)
        if line == lastline:
            break
    # Remove quotes before regexing against them.
    line = line.replace('""" + """', "")
    line = line.replace('"""', "")
    # Remove a few concat patterns. Theres a bug with some obfuscation
    # techniques.
    line = line.replace(" + ", "")
    line = line.replace(" & ", "")
    return line

Beispiel #8

0

Datei anzeigen

Datei: sbd.py Projekt: katherinehuwu/Word_Fit

def clean(t):
    """
    normalize numbers, discard some punctuation that can be ambiguous
    """
    t = re.sub('[.,\d]*\d', '<NUM>', t)
    t = re.sub('[^a-zA-Z0-9,.;:<>\-\'\/?!$% ]', '', t)
    t = t.replace('--', ' ') # sometimes starts a sentence... trouble
    return t

Beispiel #9

0

Datei anzeigen

Datei: word_processor.py Projekt: abrazinskas/BSG

    def __open_text_cleaner(word):
        """
        Direct copy from the original BSG setup. The tokens matching logic was moved to bsg_tokenizer.py

        """
        word = re.sub(r'[^\w\'\-]|[\'\-\_]{2,}', "", word)
        if len(word) == 1:
            word = re.sub(r'[^\daiu]', '', word)
        return word

Beispiel #10

0

Datei anzeigen

    def run(self, results):
        """Run Moloch to import pcap
        @return: nothing 
        """
        self.key = "moloch"
        self.alerthash = {}
        self.fileshash = {}
        self.MOLOCH_CAPTURE_BIN = self.options.get("capture", None)
        self.MOLOCH_CAPTURE_CONF = self.options.get("captureconf", None)
        self.CUCKOO_INSTANCE_TAG = self.options.get("node", None)
        self.MOLOCH_USER = self.options.get("user", None)
        self.MOLOCH_PASSWORD = self.options.get("pass", None)
        self.MOLOCH_REALM = self.options.get("realm", None)
        self.MOLOCH_AUTH = self.options.get("auth", "digest")
        self.pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        self.MOLOCH_URL = self.options.get("base", None)
        self.task_id = results["info"]["id"]
        self.custom = None
        if "machine" in results["info"] and results["info"][
                "machine"] and "name" in results["info"]["machine"]:
            self.machine_name = re.sub(r"[\W]", "_",
                                       str(results["info"]["machine"]["name"]))
        else:
            self.machine_name = "Unknown"
        if results["info"].has_key(
                "options") and results["info"]["options"].has_key("setgw"):
            self.gateway = re.sub(r"[\W]", "_",
                                  str(results["info"]["options"]["setgw"]))
        else:
            self.gateway = "Default"

        if results["info"].has_key("options") and results["info"].has_key(
                "custom"):
            self.custom = re.sub(r"[\W]", "_", str(results["info"]["custom"]))

        if not os.path.exists(self.MOLOCH_CAPTURE_BIN):
            log.warning(
                "Unable to Run moloch-capture: BIN File %s Does Not Exist" %
                (self.MOLOCH_CAPTURE_BIN))
            return

        if not os.path.exists(self.MOLOCH_CAPTURE_CONF):
            log.warning(
                "Unable to Run moloch-capture Conf File %s Does Not Exist" %
                (self.MOLOCH_CAPTURE_CONF))
            return
        try:
            cmd = "%s -c %s -r %s -n %s -t %s:%s -t cuckoo_jtype:%s -t cuckoo_machine:%s -t cuckoo_gw:%s" % (
                self.MOLOCH_CAPTURE_BIN, self.MOLOCH_CAPTURE_CONF,
                self.pcap_path, self.CUCKOO_INSTANCE_TAG,
                self.CUCKOO_INSTANCE_TAG, self.task_id, self.task["category"],
                self.machine_name, self.gateway)
            if self.custom:
                cmd = cmd + " -t custom:%s" % (self.custom)
        except Exception, e:
            log.warning("Unable to Build Basic Moloch CMD: %s" % e)

Beispiel #11

0

Datei anzeigen

Datei: test_re.py Projekt: PeterScott/pyre2

 def test_bug_449000(self):
     # Test for sub() on escaped characters
     self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
                      'abc\ndef\n')
     self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
                      'abc\ndef\n')
     self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
                      'abc\ndef\n')
     self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
                      'abc\ndef\n')

Beispiel #12

0

Datei anzeigen

Datei: __init__.py Projekt: jaseg/ponysay

def termcenter():
	parser = argparse.ArgumentParser(description='Center stuff on terminals')
	parser.add_argument('string', nargs='*', type=str)
	args = parser.parse_args()

	for e in [sys.stdin] + args.string:
		lines = [e] if isinstance(e, str) else e.readlines()
		if lines:
			width = max(map(len, map(lambda s: re.sub(r'\x1B\[[0-9;]+m|\$.*\$', '', s), lines)))
			pad = int((os.get_terminal_size()[0]- width)/2)
			for line in lines:
				print(' '*pad + re.sub(r'\$.*\$|\n', '', line))

Beispiel #13

0

Datei anzeigen

        def process_tweet(tweet, stopwords):
            # convert to lowercase
            tweet = tweet.lower()
            # replace any links with "URL"
            tweet = re.sub(r'((www\.[^\s]+)|(https?://[^\s]+))', 'URL', tweet)
            # replace "@username" references with "AT_USER"
            tweet = re.sub(r'@[\S]+', 'AT_USER', tweet)
            # replace hashtags:  #word -> word
            tweet = re.sub(r'#([\S]+)', r'\1', tweet)

            tweet = word_tokenize(tweet)
            return [word for word in tweet if word not in stopwords]

Beispiel #14

0

Datei anzeigen

Datei: termdictparser.py Projekt: cns-iu/myaura

def preprocess(sentence):
    """ A simple function to handle preprocessing of a sentence"""
    # Lowercase sentence
    sentence = sentence.lower()
    # Remove @ mentions
    sentence = re.sub('@[a-z0-9_]+', '', sentence)
    # Remove URLs
    sentence = re.sub(
        r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
        '', sentence)
    # Remove NewLines
    sentence = sentence.replace('\r\n', ' ').replace('\n',
                                                     ' ').replace('\r', ' ')
    return sentence

Beispiel #15

0

Datei anzeigen

Datei: utils.py Projekt: girulea/News-Crawler

def are_equals_urls(url1, url2):
    result = False
    url1 = urllib.parse.unquote(url1)
    url2 = urllib.parse.unquote(url2)
    url1 = re.sub("^(https?://(www.)?)", "", url1)
    url1 = re.sub(' ', '', url1)
    url2 = re.sub("^(https?://(www.)?)", "", url2)
    url2 = re.sub(' ', '', url2)
    if url1.endswith('/'):
        url1 = url1[0:-1]
    if url2.endswith('/'):
        url2 = url2[0:-1]
    if url1 == url2:
        result = True
    return result

Beispiel #16

0

Datei anzeigen

Datei: sbd.py Projekt: katherinehuwu/Word_Fit

def get_features(frag, model):
    """
    ... w1. (sb?) w2 ...
    Features, listed roughly in order of importance:

    (1) w1: word that includes a period
    (2) w2: the next word, if it exists
    (3) w1length: number of alphabetic characters in w1
    (4) w2cap: true if w2 is capitalized
    (5) both: w1 and w2
    (6) w1abbr: log count of w1 in training without a final period
    (7) w2lower: log count of w2 in training as lowercased
    (8) w1w2upper: w1 and w2 is capitalized
    """
    words1 = clean(frag.tokenized).split()
    if not words1: w1 = ''
    else: w1 = words1[-1]
    if frag.next:
        words2 = clean(frag.next.tokenized).split()
        if not words2: w2 = ''
        else: w2 = words2[0]
    else:
        words2 = []
        w2 = ''

    c1 = re.sub('(^.+?\-)', '', w1)
    c2 = re.sub('(\-.+?)$', '', w2)

    feats = {}
    
    feats['w1'] = c1
    feats['w2'] = c2
    feats['both'] = c1 + '_' + c2

    len1 = min(10, len(re.sub('\W', '', c1)))
    
    if c1.replace('.','').isalpha():
        feats['w1length'] = str(len1)
        try: feats['w1abbr'] = str(int(math.log(1+model.non_abbrs[c1[:-1]])))
        except: feats['w1abbr'] = str(int(math.log(1)))

    if c2.replace('.','').isalpha():
        feats['w2cap'] = str(c2[0].isupper())
        try: feats['w2lower'] = str(int(math.log(1+model.lower_words[c2.lower()])))
        except: feats['w2lower'] = str(int(math.log(1)))        
        feats['w1w2upper'] = c1 + '_' + str(c2[0].isupper())

    return feats

Beispiel #17

0

Datei anzeigen

Datei: dyre_apis.py Projekt: 453483289/community-modified

    def on_call(self, call, process):
        # Legacy, modern Dyre doesn't have hardcoded hashes in
        # CryptHashData anymore
        iocs = [
            "J7dnlDvybciDvu8d46D\\x00",
            "qwererthwebfsdvjaf+\\x00",
        ]
        pipe = [
            "\\??\\pipe\\3obdw5e5w4",
            "\\??\\pipe\\g2fabg5713",
        ]
        if call["api"] == "CryptHashData":
            buf = self.get_argument(call, "Buffer")
            if buf in iocs:
                self.cryptoapis = True
            tmp = re.sub(r"\\x[0-9A-Fa-f]{2}", "", buf)
            if self.compname in tmp:
                if re.match("^" + self.compname + "[0-9 ]+$", tmp):
                    self.cryptoapis = True
        elif call["api"] == "HttpOpenRequestA":
            buf = self.get_argument(call, "Path")
            if len(buf) > 10:
                self.networkapis.add(buf)
        elif call["api"] == "NtCreateNamedPipeFile":
            buf = self.get_argument(call, "PipeName")
            for npipe in pipe:
                if buf == npipe:
                    self.syncapis = True
                    break

        return None

Beispiel #18

0

Datei anzeigen

Datei: models.py Projekt: galgeek/rulesengine-client

    def rewrite(self, response):
        """ Rewrites response according to matching rewrite rules.

        :return: rewritten response
        """
        headers_r = response.headers
        content_r = response.data
        for r in self.rules:
            if r.policy == 'rewrite-headers':
                #  todo: support rewrite-headers, use, e.g.,
                #   response.headers['X-Archive-Guessed-Content-Type']
                #  is this bytes or string?
                #  also, we're rewriting only "rewritable" mimetypes in wsgiapp.py
                self._log.warn(f'rulesengine policy rewrite-headers to be implemented')
                continue
            if r.policy == 'rewrite-all':
                try:
                    content_r = re.sub(r.rewrite_from, r.rewrite_to, content_r, options=re2_options)
                    self._log.info(f'rewriting response.data from... {r.rewrite_from[:80]}... to ...{r.rewrite_to[:80]}')
                except Exception as e:
                    self._log.warn(f'exception rewriting response.data from {r.rewrite_from[:80]}: {e}')
                    content_r = response.data
            elif r.policy == 'rewrite-js':
                self._log.warn(f'rulesengine policy rewrite-js to be implemented')
                # support this here?
                continue
            else:
                self._log.warn('unexpected policy; nothing rewritten!')
        return headers_r, content_r

Beispiel #19

0

Datei anzeigen

def _files(files):
    if not files:
        files = []
    hash = md5(str(datetime.now())).hexdigest()
    dest = '%s/%s/%s/%s' % (env.user.login[0], env.user.login, hash[:2],
                            hash[2:4])

    files_del = env.request.args('del-attach', [])
    if not isinstance(files_del, (list, tuple)):
        files_del = [files_del]
    for f in files_del:
        if f not in files:
            continue
        remove_attach(f)
        files.remove(f)

    files_in = env.request.args('attach', [])
    files_p = env.request.files('attach')

    if not isinstance(files_in, (list, tuple)):
        files_in = [files_in]
        files_p = [files_p]

    for i, file in enumerate(files_in[:10]):
        if isinstance(file, str):
            file = file.decode('utf-8')
        file = re.sub(r'[^\w\.]+', '-', unidecode(file))
        d = "%s/%s/" % (dest, randint(1000, 9999))
        make_attach(files_p[i], d, file, remove=True)
        files.append(os.path.join(d, file))

    return files

Beispiel #20

0

Datei anzeigen

    def run(self) -> List[str]:
        ret = []
        with open(self.filepath, "r") as f:
            source = f.read()

        # Get rid of superfluous comments.
        source = re.sub("/\\*.*?\\*/", "", source, flags=re.S)

        for script in re.findall(self.script_re, source, re.I | re.S):
            try:
                x = bs4.BeautifulSoup(script, "html.parser")
                language = x.script.attrs.get("language", "").lower()
            except Exception:
                language = None

            # We can't rely on bs4 or any other HTML/XML parser to provide us
            # with the raw content of the xml tag as they decode html entities
            # and all that, leaving us with a corrupted string.
            source = re.match("<.*>(.*)</.*>$", script, re.S).group(0)

            # Decode JScript.Encode encoding.
            if language in {"jscript.encode", "vbscript.encode"}:
                source = EncodedScriptFile(self.filepath).decode(source.encode())

            if len(source) > 65536:
                source = f"{source[:65536]}\r\n<truncated>"

            ret.append(source)

        return ret

Beispiel #21

0

Datei anzeigen

def spaceReplace(inputString, MODFLAG):
    # OLD: $var=    "EXAMPLE"
    # NEW: $var= "EXAMPLE"
    if MODFLAG == 0:
        MODFLAG = 0

    return re.sub(" +", " ", inputString), MODFLAG

Beispiel #22

0

Datei anzeigen

Datei: blog.py Projekt: radjah/point-www

def _files(files):
    if not files:
        files = []
    hash = md5(str(datetime.now())).hexdigest()
    dest = '%s/%s/%s/%s' % (env.user.login[0], env.user.login,
                            hash[:2], hash[2:4])

    files_del = env.request.args('del-attach', [])
    if not isinstance(files_del, (list, tuple)):
        files_del = [files_del]
    for f in files_del:
        if f not in files:
            continue
        remove_attach(f)
        files.remove(f)

    files_in = env.request.args('attach', [])
    files_p = env.request.files('attach')

    if not isinstance(files_in, (list, tuple)):
        files_in = [files_in]
        files_p = [files_p]

    for i, file in enumerate(files_in[:10]):
        if isinstance(file, str):
            file = file.decode('utf-8')
        file = re.sub(r'[^\w\.]+', '-', unidecode(file))
        d = "%s/%s/" % (dest, randint(1000, 9999))
        make_attach(files_p[i], d, file, remove=True)
        files.append(os.path.join(d, file))

    return files

Beispiel #23

0

Datei anzeigen

Datei: utils.py Projekt: girulea/News-Crawler

def get_db_name_from_url(url):
    domain = get_domain(url)
    result = re.sub('\.', '_', domain)

    # url = url.encode('utf-8', 'replace')
    # result = hashlib.md5(url).hexdigest()
    return result + '.db'

Beispiel #24

0

Datei anzeigen

Datei: lswww.py Projekt: psuedoelastic/wapiti

 def __reWildcard(regexp, string):
     """Wildcard-based regular expression system"""
     regexp = re.sub("\*+", "*", regexp)
     match = True
     if regexp.count("*") == 0:
         if regexp == string:
             return True
         else:
             return False
     blocks = regexp.split("*")
     start = ""
     end = ""
     if not regexp.startswith("*"):
         start = blocks[0]
     if not regexp.endswith("*"):
         end = blocks[-1]
     if start != "":
         if string.startswith(start):
             blocks = blocks[1:]
         else:
             return False
     if end != "":
         if string.endswith(end):
             blocks = blocks[:-1]
         else:
             return False
     blocks = [block for block in blocks if block != ""]
     if not blocks:
         return match
     for block in blocks:
         i = string.find(block)
         if i == -1:
             return False
         string = string[i + len(block):]
     return match

Beispiel #25

0

Datei anzeigen

Datei: dyre_apis.py Projekt: ditekshen/community

    def on_call(self, call, process):
        # Legacy, modern Dyre doesn't have hardcoded hashes in
        # CryptHashData anymore
        iocs = [
            "J7dnlDvybciDvu8d46D\\x00",
            "qwererthwebfsdvjaf+\\x00",
        ]
        pipe = [
            "\\??\\pipe\\3obdw5e5w4",
            "\\??\\pipe\\g2fabg5713",
        ]
        if call["api"] == "CryptHashData":
            buf = self.get_argument(call, "Buffer")
            if buf in iocs:
                self.cryptoapis = True
            tmp = re.sub(r"\\x[0-9A-Fa-f]{2}", "", buf)
            if self.compname in tmp:
                if re.match("^" + self.compname + "[0-9 ]+$", tmp):
                    self.cryptoapis = True
        elif call["api"] == "HttpOpenRequestA":
            buf = self.get_argument(call, "Path")
            if len(buf) > 10:
                self.networkapis.add(buf)
        elif call["api"] == "NtCreateNamedPipeFile":
            buf = self.get_argument(call, "PipeName")
            for npipe in pipe:
                if buf == npipe:
                    self.syncapis = True
                    break

        return None

Beispiel #26

0

Datei anzeigen

def processText(text):
    '''
	strips some unwanted characters. Originally stripped the "references" section according to pubGeneric but it wasn't working. Splits full text strings by a simple sentence filter.
	'''
    text = re.sub(r'\x07|\r', '', text)
    #text = re.sub(r'\x07|\r|[(\s{0,3}\d{1,3}\s{0,3})(,\s{0,3}\d{1,3}\s{0,3}){0,7}\]', '', text)
    # strip ^G, \r, and inline citations
    #sections = pubGeneric.sectionRanges(text)
    #if sections is not None:
    #	try:
    #		dropRange = sections['ack']
    #		text = text[:dropRange[0]] + text[dropRange[1]:]
    #	except KeyError:
    #		pass
    #	try:
    #		dropRange = sections['refs']
    #		text = text[:dropRange[0]] + text[dropRange[1]:]
    #	except KeyError:
    #		pass

    # split by period followed by capital letter within 3 proceeding characters
    previousThreshold = -2
    threshold = 0
    for threshold in re.finditer('\..?.?([A-Z])', text):
        threshold = threshold.start()
        yield text[previousThreshold + 2:threshold + 1]
        previousThreshold = threshold
    yield text[threshold:]

Beispiel #27

0

Datei anzeigen

 def __reWildcard(regexp, string):
     """Wildcard-based regular expression system"""
     regexp = re.sub("\*+", "*", regexp)
     match = True
     if regexp.count("*") == 0:
         if regexp == string:
             return True
         else:
             return False
     blocks = regexp.split("*")
     start = ""
     end = ""
     if not regexp.startswith("*"):
         start = blocks[0]
     if not regexp.endswith("*"):
         end = blocks[-1]
     if start != "":
         if string.startswith(start):
             blocks = blocks[1:]
         else:
             return False
     if end != "":
         if string.endswith(end):
             blocks = blocks[:-1]
         else:
             return False
     blocks = [block for block in blocks if block != ""]
     if not blocks:
         return match
     for block in blocks:
         i = string.find(block)
         if i == -1:
             return False
         string = string[i + len(block):]
     return match

Beispiel #28

0

Datei anzeigen

Datei: feedproc.py Projekt: ap-Codkelden/point-core

    def process_entry(self, entry):
        out = {'type': 'feed'}

        # UGnich - mooduck
        if entry['published'].endswith('UT'):
            entry['published'] = '%sC' % entry['published']

        tz = timezone(settings.timezone)
        try:
            out['created'] = \
               dateutil.parser.parse(entry['published']).astimezone(tz)
        except ValueError:
            entry['created'] = \
               dateutil.parser.parse(entry['published'])

        out['link'] = entry['link']
        out['title'] = re.sub(r'&#(?P<c>\d+);',
                              lambda c: unichr(int(c.group('c'))),
                              unescape(entry['title'])) \
                              if 'title' in entry else ''
        out['text'] = self.process_text(entry['summary'])
        out['tags'] = [ t['label'] or t['term'] for t in entry['tags'] ] \
                        if 'tags' in entry else []

        return out

Beispiel #29

0

Datei anzeigen

Datei: automoderator.py Projekt: plbogen/AutoModerator

def replace_placeholders(string, item, match):
    """Replaces placeholders in the string."""
    if isinstance(item, praw.objects.Comment):
        string = string.replace('{{body}}', item.body)
    else:
        string = string.replace('{{body}}', item.selftext)
    string = string.replace('{{domain}}', getattr(item, 'domain', ''))
    string = string.replace('{{permalink}}', get_permalink(item))
    string = string.replace('{{subreddit}}', item.subreddit.display_name)
    if isinstance(item, praw.objects.Comment):
        string = string.replace('{{title}}', item.link_title)
    else:
        string = string.replace('{{title}}', item.title)
    string = string.replace('{{url}}', getattr(item, 'url', ''))
    if item.author:
        string = string.replace('{{user}}', item.author.name)
    else:
        string = string.replace('{{user}}', '[deleted]')

    # replace any {{match_##}} with the corresponding match groups
    string = re.sub(r'\{\{match-(\d+)\}\}', r'\\\1', string)
    if match:
        string = match.expand(string)

    return string

Beispiel #30

0

Datei anzeigen

Datei: utils.py Projekt: girulea/News-Crawler

def clean_url(url, remove_arguments=True, domain=None, scheme=None):
    result = urllib.parse.unquote(url)
    # if '#' in result:
    # 	i = result.find('#')
    # 	result = result[:i]
    if domain or remove_arguments:
        if '?' in result:
            i = result.find('?')
            if domain:
                result1 = result[:i]
                result2 = result[i + 1:]
                if domain in result1 and remove_arguments:
                    result = result1
                elif domain in result2:
                    res_split = result2.split('=')
                    for r in res_split:
                        if domain in r:
                            result = r
                            if '&' in result:
                                i = result.find('&')
                                result = result[:i]
                            break
            else:
                result = result[:i]
    if scheme:
        if not re.match('https?://', result):
            result = scheme + '://' + result
    result = re.sub(' ', '', result)
    if result.endswith('/'):
        result = result[:-1]
    return result

Beispiel #31

0

Datei anzeigen

    def add_html_links(mentions, text):
        linked_text = ""
        mentions.sort(key=lambda x: x.start)
        dummy_char = "$"
        char_list = list(text)
        rabbi_dict = {}
        for m in mentions:
            if m.id_matches is None:
                continue
            rabbi_dict[m.start] = (text[m.start:m.end], m.id_matches)
            char_list[m.start:m.end] = list(dummy_char * (m.end - m.start))
        dummy_text = "".join(char_list)

        # assert len(dummy_text) == len(text), f"DUMMY {dummy_text}\nREAL {text}"

        def repl(match):
            try:
                mention, slugs = rabbi_dict[match.start()]
            except KeyError:
                print("KEYERROR", match.group())
                return match.group()
            # TODO find better way to determine if slug is in topics collection
            slug = slugs[0]
            other_slugs = slugs[1:]
            link = f"""<a href="https://www.sefaria.org/topics/{slug}" class="{"missing" if ':' in slug else "found"}">{mention}</a>"""
            if len(other_slugs) > 0:
                link += f'''<sup>{", ".join([f"""<a href="https://www.sefaria.org/topics/{temp_slug}" class="{"missing" if ':' in temp_slug else "found"}">[{i+1}]</a>""" for i, temp_slug in enumerate(other_slugs)])}</sup>'''
            return link

        linked_text = re.sub(r"\$+", repl, dummy_text)
        return linked_text

Beispiel #32

0

Datei anzeigen

Datei: __init__.py Projekt: jaseg/ponysay

def render_pony(name, text, balloonstyle, width=80, center=False, centertext=False):
	pony = load_pony(name)
	balloon = link_l = link_r = ''
	if text:
		[link_l, link_r] = balloonstyle[-2:]
	for i,line in enumerate(pony):
		match = re.search('\$balloon([0-9]*)\$', line)
		if match:
			minwidth = int(match.group(1) or '0')
			pony[i:i+1] = render_balloon(text, balloonstyle, minwidth=minwidth, maxwidth=int(width/2), pad=str.center if centertext else str.ljust)
			break
	try:
		first = pony.index('$$$')
		second = pony[first+1:].index('$$$')
		pony[first:] = pony[first+1+second+1:]
	except:
		pass
	pony = [ line.replace('$\\$', link_l).replace('$/$', link_r) for line in pony ]
	indent = ''
	if center:
		ponywidth = max([ len(re.sub(r'\x1B\[[0-9;]+m|\$.*\$', '', line)) for line in pony ])
		indent = ' '*int((width-ponywidth)/2)
	wre = re.compile('((\x1B\[[0-9;]+m)*.){0,%s}' % width)
	reset = '[39;49m\n'
	return indent+(reset+indent).join([ wre.search(line).group() for line in pony ])+reset

Beispiel #33

0

Datei anzeigen

Datei: interactionFinder.py Projekt: bylin/text-mining

def processText(text):
	'''
	strips some unwanted characters. Originally stripped the "references" section according to pubGeneric but it wasn't working. Splits full text strings by a simple sentence filter.
	'''
	text = re.sub(r'\x07|\r', '', text)
	#text = re.sub(r'\x07|\r|[(\s{0,3}\d{1,3}\s{0,3})(,\s{0,3}\d{1,3}\s{0,3}){0,7}\]', '', text)
		# strip ^G, \r, and inline citations
	#sections = pubGeneric.sectionRanges(text)
	#if sections is not None:
	#	try:
	#		dropRange = sections['ack']
	#		text = text[:dropRange[0]] + text[dropRange[1]:]
	#	except KeyError:
	#		pass
	#	try:
	#		dropRange = sections['refs']
	#		text = text[:dropRange[0]] + text[dropRange[1]:]
	#	except KeyError:
	#		pass
	
	# split by period followed by capital letter within 3 proceeding characters
	previousThreshold = -2
	threshold = 0
	for threshold in re.finditer('\..?.?([A-Z])', text):
		threshold = threshold.start()
		yield text[previousThreshold+2:threshold+1]
		previousThreshold = threshold
	yield text[threshold:]

Beispiel #34

0

Datei anzeigen

    def _find_and_replace(self, date_string, captures):
        """
        :warning: when multiple tz matches exist the last sorted capture will trump
        :param date_string:
        :return: date_string, tz_string
        """
        # add timezones to replace
        # import pdb; pdb.set_trace()
        cloned_replacements = copy.copy(self.REPLACEMENTS)  # don't mutate
        if captures.get('timezones') is not None:
            for tz_string in captures.get('timezones', []):
                cloned_replacements.update({tz_string: ' '})

        date_string = date_string.lower()
        for key, replacement in cloned_replacements.items():
            # we really want to match all permutations of the key surrounded by whitespace chars except one
            # for example: consider the key = 'to'
            # 1. match 'to '
            # 2. match ' to'
            # 3. match ' to '
            # but never match r'(\s|)to(\s|)' which would make 'october' > 'ocber'
            date_string = re.sub(r'(?i)(^|\s)' + key + '(\s|$)', replacement,
                                 date_string)

        poptzstring = ''
        if captures.get('timezones') is not None:
            poptzstring = self._pop_tz_string(
                sorted(captures.get('timezones', [])))
        return date_string, poptzstring

Beispiel #35

0

Datei anzeigen

Datei: feedproc.py Projekt: ap-Codkelden/point-core

def html2md(s):
    h2t = HTML2Text()
    h2t.body_width = 0
    #h2t.ignore_links = True
    #h2t.ignore_images = True
    s = h2t.handle(s)
    s = re.sub(r'\!?\[\]\((?P<url>.+?)\)', lambda m: " %s " % m.group('url'), s)
    return s

Beispiel #36

0

Datei anzeigen

 def get_rabbi_regex(cls, rabbi):
     reg = rabbi.replace(
         cls.b_token,
         f"(?:{u'|'.join(re.escape(b) for b in cls.b_replacements)})")
     for starter in cls.starting_replacements:
         starter = re.escape(starter)
         reg = re.sub(f'^{starter}', f"(?:{starter.lower()}|{starter})",
                      reg)
     return reg

Beispiel #37

0

Datei anzeigen

Datei: utils.py Projekt: girulea/News-Crawler

def extract_domain_name_from_db(file_):
    domain_name = file_[file_.rfind('/') + 1:]
    domain_name = domain_name.replace('_', '.')
    domain_name = re.sub('^(www\d?\.)', '', domain_name)
    domain_name = domain_name.replace('.db', '')
    # domain_name = domain_name.replace('.it', '')
    # domain_name = domain_name.replace('.com', '')
    # domain_name = domain_name.replace('.org', '')
    return domain_name

Beispiel #38

0

Datei anzeigen

Datei: utils.py Projekt: girulea/News-Crawler

def get_principal_domain(
        url):  # estrae da un url il dominio "principale"; es: www.xxxxx.xx.it
    if not isinstance(url, str):
        url = str(url)
        url = urllib.parse.unquote(url)
    result = urlparse(url).hostname
    if result:
        result = re.sub('^(www\d?.)', '', result)
    return result

Beispiel #39

0

Datei anzeigen

Datei: autoparser.py Projekt: aih/uscites

def parsesections(pattern, pattern_replace, section):
    sectionsref = re.search(pattern, section)
    while sectionsref:
        i1 = sectionsref.start(1)
        i2 = sectionsref.end(2)
        #print "found multiple secs at", i1, "-", i2
        section = section[:i1]+re.sub(pattern_replace[0], pattern_replace[1] % sectionsref.group(2), section[i1:i2]) + section[1+i2:]
        sectionsref = re.search(pattern, section)
    return section

Beispiel #40

0

Datei anzeigen

Datei: md.py Projekt: isqua-test/point-core

 def replace(self, m):
     return "%s%s%s%s%s%s" % (
         m.group("scheme"),
         m.group("pass"),
         m.group("authority"),
         m.group("undef"),
         m.group("query"),
         re.sub(r":", "%3a", m.group("fragment")),
     )

Beispiel #41

0

Datei anzeigen

Datei: auth.py Projekt: artss/point-www-new

def ulogin():
    if env.user.id:
        raise AlreadyAuthorized

    sess = Session()

    if env.request.method == "POST":
        url = "http://ulogin.ru/token.php?token=%s&host=%s" % (env.request.args("token"), settings.domain)
        try:
            resp = urllib2.urlopen(url)
            data = dict.fromkeys(ULOGIN_FIELDS)
            data.update(json.loads(resp.read()))
            resp.close()
        except urllib2.URLError:
            return render("/auth/login.html", fields=ULOGIN_FIELDS, errors=["ulogin-fail"])

        try:
            env.user.authenticate_ulogin(data["network"], data["uid"])
            if env.user.id:
                return Response(redirect=referer())
        except NotAuthorized:
            pass

        login = data["nickname"].strip(u" -+.")
        if login:
            login = re.sub(r"[\._\-\+]+", "-", login)

        info = {
            "login": login,
            "network": data["network"],
            "uid": data["uid"],
            "name": ("%s %s" % (data["first_name"], data["last_name"])).strip(),
            "email": data["email"],
            "avatar": data["photo_big"],
            "birthdate": data["bdate"],
            "gender": True if data["sex"] == "2" else False if data["sex"] == "1" else None,
            "location": "%s, %s" % (data["city"], data["country"])
            if data["city"] and data["country"]
            else data["city"] or data["country"],
            "_nickname": data["nickname"],
            "_name": ("%s %s" % (data["first_name"], data["last_name"])).strip(),
            "_profile": data["profile"],
        }

        sess["reg_info"] = info
        sess.save()

    else:
        info = sess["reg_info"]

        if not info or not "network" in info or not "uid" in info:
            return Response(redirect="%s://%s/register" % (env.request.protocol, settings.domain))

    info["birthdate"] = parse_date(info["birthdate"]) or datetime.now() - timedelta(days=365 * 16 + 4)

    return render("/auth/register_ulogin.html", info=info)

Beispiel #42

0

Datei anzeigen

async def async_get_oauth2_token(session: aiohttp.ClientSession, username: str, password: str) -> Dict:
    """Hackily get an oauth2 token until I can be bothered to do this correctly"""
    params = {
        'client_id': OAUTH2_CLIENT_ID,
        'response_type': 'code',
        'access_type': 'offline',
        'redirect_uri': OAUTH2_REDIRECT_URI,
    }

    async with session.get(f'{LOGIN_URL}/oauth2/auth', params=params) as resp:
        if 400 <= resp.status < 500:
            raise GeAuthError(await resp.text())
        if resp.status >= 500:
            raise GeServerError(await resp.text())
        resp_text = await resp.text()

    email_regex = (
        r'^\s*(\w+(?:(?:-\w+)|(?:\.\w+)|(?:\+\w+))*\@'
        r'[A-Za-z0-9]+(?:(?:\.|-)[A-Za-z0-9]+)*\.[A-Za-z0-9][A-Za-z0-9]+)\s*$'
    )
    clean_username = re.sub(email_regex, r'\1', username)

    etr = etree.HTML(resp_text)
    post_data = {
        i.attrib['name']: i.attrib['value']
        for i in etr.xpath("//form[@id = 'frmsignin']//input")
        if 'value' in i.keys()
    }
    post_data['username'] = clean_username
    post_data['password'] = password

    async with session.post(f'{LOGIN_URL}/oauth2/g_authenticate', data=post_data, allow_redirects=False) as resp:
        if 400 <= resp.status < 500:
            raise GeAuthError(await resp.text())
        if resp.status >= 500:
            raise GeServerError(await resp.text())
        code = parse_qs(urlparse(resp.headers['Location']).query)['code'][0]

    post_data = {
        'code': code,
        'client_id': OAUTH2_CLIENT_ID,
        'client_secret': OAUTH2_CLIENT_SECRET,
        'redirect_uri': OAUTH2_REDIRECT_URI,
        'grant_type': 'authorization_code',
    }
    auth = aiohttp.BasicAuth(OAUTH2_CLIENT_ID, OAUTH2_CLIENT_SECRET)
    async with session.post(f'{LOGIN_URL}/oauth2/token', data=post_data, auth=auth) as resp:
        if 400 <= resp.status < 500:
            raise GeAuthError(await resp.text())
        if resp.status >= 500:
            raise GeServerError(await resp.text())
        oauth_token = await resp.json()
    try:
        return {'Authorization': 'Bearer ' + oauth_token['access_token']}
    except KeyError:
        raise GeAuthError(f'Failed to get a token: {oauth_token}')

Beispiel #43

0

Datei anzeigen

 def normalize_text(cls, lang, s):
     # text = re.sub('<[^>]+>', ' ', text)
     if lang == 'en':
         s = cls.myunidecode(s)
         s = re.sub(cls.normalizing_reg, cls.normalizing_rep, s)
         # text = unidecode(text)
         # text = re.sub('\([^)]+\)', ' ', text)
         # text = re.sub('\[[^\]]+\]', ' ', text)
     # text = ' '.join(text.split())
     return s

Beispiel #44

0

Datei anzeigen

Datei: moloch.py Projekt: swackhamer/cuckoo-modified

    def run(self,results):
        """Run Moloch to import pcap
        @return: nothing 
        """
        self.key = "moloch"
        self.alerthash ={}
        self.fileshash ={}
        self.MOLOCH_CAPTURE_BIN = self.options.get("capture", None)
        self.MOLOCH_CAPTURE_CONF = self.options.get("captureconf",None)
        self.CUCKOO_INSTANCE_TAG = self.options.get("node",None)
        self.MOLOCH_USER = self.options.get("user",None)
        self.MOLOCH_PASSWORD = self.options.get("pass",None) 
        self.MOLOCH_REALM = self.options.get("realm",None)
        self.MOLOCH_AUTH = self.options.get("auth","digest")
        self.pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        self.MOLOCH_URL = self.options.get("base",None)
        self.task_id = results["info"]["id"]
        self.custom = None
        if results["info"].has_key("machine") and results["info"]["machine"].has_key("name"):
            self.machine_name = re.sub(r"[\W]","_",str(results["info"]["machine"]["name"]))
        else:
            self.machine_name = "Unknown"
        if results["info"].has_key("options") and results["info"]["options"].has_key("setgw"):
            self.gateway = re.sub(r"[\W]","_",str(results["info"]["options"]["setgw"]))
        else:
            self.gateway = "Default"

        if results["info"].has_key("options") and results["info"].has_key("custom"):
            self.custom = re.sub(r"[\W]","_",str(results["info"]["custom"]))

        if not os.path.exists(self.MOLOCH_CAPTURE_BIN):
            log.warning("Unable to Run moloch-capture: BIN File %s Does Not Exist" % (self.MOLOCH_CAPTURE_BIN))
            return
        
        if not os.path.exists(self.MOLOCH_CAPTURE_CONF):
            log.warning("Unable to Run moloch-capture Conf File %s Does Not Exist" % (self.MOLOCH_CAPTURE_CONF))
            return         
        try:
            cmd = "%s -c %s -r %s -n %s -t %s:%s -t cuckoo_jtype:%s -t cuckoo_machine:%s -t cuckoo_gw:%s" % (self.MOLOCH_CAPTURE_BIN,self.MOLOCH_CAPTURE_CONF,self.pcap_path,self.CUCKOO_INSTANCE_TAG,self.CUCKOO_INSTANCE_TAG,self.task_id,self.task["category"],self.machine_name,self.gateway)
            if self.custom:
                cmd = cmd + " -t custom:%s" % (self.custom)
        except Exception,e:
            log.warning("Unable to Build Basic Moloch CMD: %s" % e)

Beispiel #45

0

Datei anzeigen

Datei: termdictparser.py Projekt: rionbr/smm4h

 def preprocess(self,
                lower=False,
                remove_hash=False,
                remove_mentions=False,
                remove_url=False,
                remove_newline=False):
     if lower:
         self.text_pp = self.text_pp.lower()
     if remove_hash:
         self.text_pp = self.text_pp.replace('#', '')
     if remove_mentions:
         self.text_pp = re.sub('@[a-z0-9_]+', '', self.text_pp)
     if remove_url:
         self.text_pp = re.sub(
             r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
             '', self.text_pp)
     if remove_newline:
         self.text_pp = self.text_pp.replace('\r\n', ' ').replace(
             '\n', ' ').replace('\r', ' ')
     return self

Beispiel #46

0

Datei anzeigen

Datei: word_processor.py Projekt: abrazinskas/BSG

 def __init__(self, word_processor_type='default'):
     self.__allowed_types = ['none', 'default', 'open_text']
     # sanity checks for input
     assert word_processor_type in self.__allowed_types
     # assigning processing function
     if word_processor_type == 'none':
         self.__call__ = lambda x: x
     if word_processor_type == 'default':
         self.__call__ = lambda word: re.sub(r'[^\w_,.?@!$#\':\/\-()]|[,\'?@$#]{2,}', "", word)
     if word_processor_type == "open_text":
         self.__call__ = self.__open_text_cleaner

Beispiel #47

0

Datei anzeigen

Datei: unwatermark.py Projekt: movb/it-ebooks-unwatermark

def remove_evil_links(pdf_data):
    """ Removes all it-ebook's links and metadata from the passed PDF data. """
    pdf_data = pdf_data.encode("hex")
    # Remove each annotation element inside the PDF file (This removes the
    # "clickable" it-ebooks.info links)
    print 'Removing evil links'
    new_data = re2.sub(pattern, "", pdf_data)
    # Remove the actual links (link elements which are assigned to the annotations)
    new_data = new_data.replace("www.it-ebooks.info".encode("hex"), "")
    print 'Done'
    return new_data.decode("hex")

Beispiel #48

0

Datei anzeigen

Datei: scheduler.py Projekt: CIRCL/cuckoo-modified

    def build_options(self):
        """Generate analysis options.
        @return: options dict.
        """
        options = {}

        options["id"] = self.task.id
        options["ip"] = self.machine.resultserver_ip
        options["port"] = self.machine.resultserver_port
        options["category"] = self.task.category
        options["target"] = self.task.target
        options["package"] = self.task.package


        if self.task.package == "service":
           if "service-dll-of-interest" not in self.task.options:
              if self.task.options == "":
                 self.task.options = "service-dll-of-interest=c:\\windows\\system32\\nwsapagent.dll"
              else:
                 self.task.options += ",service-dll-of-interest=c:\\windows\\system32\\nwsapagent.dll"
        options["options"] = self.task.options

        options["enforce_timeout"] = self.task.enforce_timeout
        options["clock"] = self.task.clock
        options["terminate_processes"] = self.cfg.cuckoo.terminate_processes

        if not self.task.timeout or self.task.timeout == 0:
            options["timeout"] = self.cfg.timeouts.default
        else:
            options["timeout"] = self.task.timeout

        if self.task.category == "file":
            options["file_name"] = File(self.task.target).get_name()
            options["file_type"] = File(self.task.target).get_type()
            # if it's a PE file, collect export information to use in more smartly determining the right
            # package to use
            options["exports"] = ""
            if HAVE_PEFILE and ("PE32" in options["file_type"] or "MS-DOS executable" in options["file_type"]):
                try:
                    pe = pefile.PE(self.task.target)
                    if hasattr(pe, "DIRECTORY_ENTRY_EXPORT"):
                        exports = []
                        for exported_symbol in pe.DIRECTORY_ENTRY_EXPORT.symbols:
                            exports.append(re.sub(r'[^A-Za-z0-9_?@-]', '', exported_symbol.name))
                        options["exports"] = ",".join(exports)
                except:
                    pass




        return options

Beispiel #49

0

Datei anzeigen

Datei: __init__.py Projekt: isqua-test/point-core

    def _generate_login(self):
        name = unidecode(self.get_info('name')).lower()
        if not name:
            name = re.sub(r'^\w+:/+', '', self._url.lower())

        name = re.sub('^\W+|\W+$', '', name)

        words = re.split(r'\W+', name)
        name = ''
        br = False
        for w in words[:]:
            if not name:
                _name = w
            else:
                _name = "%s-%s" % (name, w)
            if len(_name) <= 16:
                name = _name
            else:
                name = _name[:16]
                br = True
                break

        if br:
            try:
                ri = name.rindex('-')
            except ValueError:
                ri = 16
            if ri > 6:
                name = name[:ri]

        i = 0

        while True:
            login = '******' % (name, i or '')
            try:
                User('login', login)
            except UserNotFound:
                return login
            i += 1

Beispiel #50

0

Datei anzeigen

Datei: recon_programs.py Projekt: 453483289/community-modified

    def on_call(self, call, process):
        if not self.check:
            return None

        if call["api"].startswith("RegQueryValueEx"):
            keyname = self.get_argument(call, "FullName")
            uninstall = "\\microsoft\\windows\\currentversion\\uninstall"
            if (keyname and uninstall in keyname.lower() and
                keyname.lower().endswith("displayname")):
                app = self.get_argument(call, "Data")
                if app:
                    # Ignore language/architecture name segments
                    buf = re.sub(r"\([^\)]+\)", "", app).strip()
                    self.programs.add(buf)

Beispiel #51

0

Datei anzeigen

Datei: event_auto_classifier.py Projekt: DanceDeets/dancedeets-server

def has_standalone_keywords(classified_event):
    solo_lines_regex = rules.GOOD_SOLO_LINE.hack_double_regex()[classified_event.boundaries]
    text = classified_event.search_text
    good_matches = set()
    for line in text.split('\n'):
        alpha_line = re.sub(r'\W+', '', line)
        if not alpha_line:
            continue
        remaining_line = solo_lines_regex.sub('', line)
        deleted_length = len(line) - len(remaining_line)
        if 0.5 < 1.0 * deleted_length / len(alpha_line):
            good_matches.add(solo_lines_regex.findall(line)[0]) # at most one keyword per line
    if len(good_matches) >= 2:
        return True, 'found good keywords on lines by themselves: %s' % set(good_matches)
    return False, 'no good keywords on lines by themselves'

Beispiel #52

0

Datei anzeigen

Datei: __init__.py Projekt: isqua-test/point-core

def parse_tags(tags):
    if tags:
        tags = tags.strip(" \r\n\t*")
        if isinstance(tags, str):
            tags = tags.decode("utf-8")
        # tags = re.findall(r'[^\s*]+', tags)
        tags = filter(
            None, [t.replace(u"\xa0", " ").strip()[:64] for t in uniqify(re.split(r"(?<!\\)[\*,]", tags)[:10])]
        )
        if not tags:
            tags = None

    else:
        tags = []
    return map(lambda t: re.sub(r"\\,", ",", t), tags)

Beispiel #53

0

Datei anzeigen

Datei: test_re.py Projekt: PeterScott/pyre2

 def test_bug_1140(self):
     # re.sub(x, y, u'') should return u'', not '', and
     # re.sub(x, y, '') should return '', not u''.
     # Also:
     # re.sub(x, y, unicode(x)) should return unicode(y), and
     # re.sub(x, y, str(x)) should return
     #     str(y) if isinstance(y, str) else unicode(y).
     for x in 'x', u'x':
         for y in 'y', u'y':
             z = re.sub(x, y, u'')
             self.assertEqual(z, u'')
             self.assertEqual(type(z), unicode)
             #
             z = re.sub(x, y, '')
             self.assertEqual(z, '')
             self.assertEqual(type(z), str)
             #
             z = re.sub(x, y, unicode(x))
             self.assertEqual(z, y)
             self.assertEqual(type(z), unicode)
             #
             z = re.sub(x, y, str(x))
             self.assertEqual(z, y)
             self.assertEqual(type(z), type(y))

Beispiel #54

0

Datei anzeigen

Datei: __init__.py Projekt: 453483289/cuckoo-modified

    def beautify(self, s, opts = None ):

        if opts != None:
            self.opts = opts


        if self.opts.brace_style not in ['expand', 'collapse', 'end-expand']:
            raise(Exception('opts.brace_style must be "expand", "collapse" or "end-expand".'))

        self.blank_state()

        while s and s[0] in [' ', '\t']:
            self.preindent_string += s[0]
            s = s[1:]

        self.input = self.unpack(s, opts.eval_code)

        parser_pos = 0
        while True:
            token_text, token_type = self.get_next_token()
            #print (token_text, token_type, self.flags.mode)
            if token_type == 'TK_EOF':
                break

            handlers = {
                'TK_START_EXPR': self.handle_start_expr,
                'TK_END_EXPR': self.handle_end_expr,
                'TK_START_BLOCK': self.handle_start_block,
                'TK_END_BLOCK': self.handle_end_block,
                'TK_WORD': self.handle_word,
                'TK_SEMICOLON': self.handle_semicolon,
                'TK_STRING': self.handle_string,
                'TK_EQUALS': self.handle_equals,
                'TK_OPERATOR': self.handle_operator,
                'TK_BLOCK_COMMENT': self.handle_block_comment,
                'TK_INLINE_COMMENT': self.handle_inline_comment,
                'TK_COMMENT': self.handle_comment,
                'TK_UNKNOWN': self.handle_unknown,
            }

            handlers[token_type](token_text)

            self.last_last_text = self.last_text
            self.last_type = token_type
            self.last_text = token_text

        sweet_code = self.preindent_string + re.sub('[\n ]+$', '', ''.join(self.output))
        return sweet_code

Beispiel #55

0

Datei anzeigen

Datei: vbadeobf.py Projekt: CIRCL/cuckoo-modified

def extract_iocs(s):
    for desc, pattern in PATTERNS:
        m = pattern.findall(s)
        if m:
            # Hacked-up buxfix for multilayer Chr(Asc(Chr(Asc( which can
            # sometimes mess up our quoted string extraction / parsing.
            while "Chr(Asc(" in s:
                lastline = s
                s = re.sub(r'(?i)Chr\(Asc\((.+?)\)\)', r"\1", s)
                if s == lastline:
                    break
            # Return the line matched and not m because I prefer to have
            # context and not simply the IOC. This helps with the executable
            # file IOC, sometimes it's a save location!
            return desc, convert_to_printable(s)
    return None

Beispiel #56

0

Datei anzeigen

Datei: search.py Projekt: isqua-test/point-core

def search_posts(text, user=None, private=None, bookmarks=False,
                 offset=0, limit=20):
    text = re.sub(r'[\(\)\[\]\{\}!?\\/]+', ' ', text).strip()

    es = Elasticsearch()

    body = {
        'query': {
            'filtered': {
                'filter': {
                    'term': {
                        'private': False
                    }
                },
                'query': {
                    'query_string': {
                        'fields': ['text', 'tags'],
                        'query': text,
                        #'analyze_wildcard': True
                    }
                }
            }
        },
        #'sort': [{'created': {'order': 'desc'}}],
        'highlight': {
            'fields': {
                'text': {
                    'pre_tags': ['**'], 'post_tags': ['**'],
                    'number_of_fragments': 2,
                    'fragment_size': 200,
                }
            }
        }
    }

    res = es.search(index='point-posts,point-comments',
                    from_=offset, size=limit+1, body=body)

    results = _plist(res)
    #results = res['hits']['hits']
    has_next = len(results) > limit
    total = res['hits']['total']
    from pprint import pprint
    pprint(res)

    return results[:limit], has_next, total

Beispiel #57

0

Datei anzeigen

Datei: __init__.py Projekt: 453483289/cuckoo-modified

    def handle_string(self, token_text):
        if self.last_type in ['TK_START_BLOCK', 'TK_END_BLOCK', 'TK_SEMICOLON']:
            self.append_newline()
        elif self.last_type == 'TK_WORD':
            self.append(' ')

        # Try to replace readable \x-encoded characters with their equivalent,
        # if it is possible (e.g. '\x41\x42\x43\x01' becomes 'ABC\x01').
        def unescape(match):
            block, code = match.group(0, 1)
            char = chr(int(code, 16))
            if block.count('\\') == 1 and char in string.printable:
                return char
            return block

        token_text = re.sub(r'\\{1,2}x([a-fA-F0-9]{2})', unescape, token_text)

        self.append(token_text)

Beispiel #58

0

Datei anzeigen

Datei: __init__.py Projekt: 453483289/cuckoo-modified

def filtercomments(source):
    """NOT USED: strips trailing comments and put them at the top."""
    trailing_comments = []
    comment = True

    while comment:
        if re.search(r'^\s*\/\*', source):
            comment = source[0, source.index('*/') + 2]
        elif re.search(r'^\s*\/\/', source):
            comment = re.search(r'^\s*\/\/', source).group(0)
        else:
            comment = None

        if comment:
            source = re.sub(r'^\s+', '', source[len(comment):])
            trailing_comments.append(comment)

    return '\n'.join(trailing_comments) + source