def c3(): x = '1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736' tmp = binascii.unhexlify(x) for i in string.printable: y = ''.join([chr(ord(a)^ord(b)) for (a,b) in zip(tmp, i*len(tmp))]) if(all(c in string.printable for c in y) and not nonsense(y)): print(i,y)
def lang_features(story_sentences: List[Dict[str, Any]]) -> List[Dict[str, Any]]: lang_list = [] wtl = WhatTheLang() for sent_dict in story_sentences: text = sent_dict["text"] try: lang = wtl.predict_lang(text) if not isinstance(lang, str): lang = "UKN" except: lang = "UKN" try: if len(text) <= 10: is_nonsense = False else: is_nonsense = nonsense(text) except: is_nonsense = True is_eng = isAscii(text) lang_dict = dict(sentence_id=sent_dict["id"], lang=lang, nonsense=is_nonsense, ascii_chars=is_eng) lang_list.append(lang_dict) return lang_list
def c3(x): tmp = binascii.unhexlify(x) res = [] for i in string.printable: y = ''.join([chr(a^ord(b)) for (a,b) in zip(tmp, i*len(tmp))]) if(all(c in string.printable for c in y) and not nonsense(y)): res.append((i,y.strip())) return res
def is_meaningful( sentence ): # exploits external library to determine if a sentence is nonsense/jibberish if len(sentence.replace(" ", "")) < 6: return False try: if nonsense(sentence): return False except: return False return True
def _test(): print(offensiveness("F**k you")) print(offensiveness("Little bitch")) print(offensiveness("You can find a coffee machine on the second floor")) print( offensiveness( "You're so dumb you can't even find a stupid coffee machine")) print(offensiveness("Can I fire that bitch, Charles?")) print(offensiveness("Where can I find a coffee machine?")) print(offensiveness("How can I file a complaint?")) # Models aren't perfect, this model does not cover all dialects print(offensiveness("Ray is a c**t")) # function nonsense returns true if a string has no meaning print(nonsense("This should return false.")) print(nonsense("ZkIOMJSIOJEKLMZKJMELLKS")) # Even when concatenating words or using more complicated codes, the model can usually recognize meaningful strings. print(nonsense("ioFlXFndrInfo")) # according to the documentation the accuracy is 99% # text has to be long enough otherwise a ValueError is raised print(nonsense("t2shrt"))
def ratings(text): table = [] for word in text.split(): if not word: continue if ignore(word): row = [word, '--', '--'] else: row = [ word, 'y' if in_dictionary(word) else 'n', 'n' if nonsense(word) else 'y' ] table.append(row) return table
def is_nonsense(sentence): """ Checks if a sentence is nonsense or not. Args: sentence: The string that is to be checked. Returns: True if the string is not nonsense and it contains more than 6 characters """ try: return nonsense(sentence) except ValueError: return False
def main(): res = {} cdump_path = "/media/tong/Elements/obfs/class-dump" file_gen = "res.json" cnt = 0 for dirpath, dirnames, ifilenames in os.walk(cdump_path): for ifilename in ifilenames: file_path = os.path.abspath(os.path.join(dirpath, ifilename)) # test from file try: with open(file_path) as frh: c_nonsense = 0 c_real = 0 c_total = 0 for s in frh.readlines(): s = s.strip() if s.startswith("-") == False and s.startswith("+") == False : continue try: if nonsense(s): c_nonsense += 1 #print(s) # print(s) else: #print(s) c_real += 1 c_total += 1 except: pass # print(c_total) if c_nonsense + c_real == 0: continue else: res[ifilename] = float(c_real) / (c_nonsense + c_real) cnt += 1 print("{}: {} {}".format(cnt, ifilename, res[ifilename])) except: continue js_fwh = open(file_gen, 'w', encoding='utf-8') json.dump(res, js_fwh)
def lookup_tokens(token_text): add_token = False if self._py_dictionary.meaning(token_text) != None: print("Py Dictionary", token_text) add_token = True else: try: if not nonsense(token_text): print("Not Nonsense", token_text) add_token = True except: pass if add_token: self._allowed_tokens.add(token_text) self._allowed_to_insert.append({"token": token_text}) self._tried_tokens.add(token_text) self._tried_to_insert.append({"token": token_text})
async def on_message(self, message): """ Event message Args: message (discord.Message): Message content """ # * Vars now = datetime.now() if message.author.id == self.bot.user.id: return if message.channel.type is discord.ChannelType.private: try: if nonsense(message.content): await message.channel.send( '<:Denied:807575178561191976> - Please provide a good text!' ) else: jmail_channel = self.bot.get_channel( Guild.mailling_channel) mail_embed = discord.Embed( title=f'New mail from {message.author.name}', color=0x2c2f33) mail_embed.add_field(name='Content:', value=f'```{message.content}```', inline=False) mail_embed.set_footer( text=now.strftime("%m/%d/%Y, %H:%M:%S"), icon_url=message.author.avatar_url) await jmail_channel.send(embed=mail_embed) except ValueError: await message.channel.send( '<:Denied:807575178561191976> - Error, your text is to smart.' ) else: pass
def check_if_obfuscated(tmp_prog_arg): print("start to check obfuscated") extract_header_file(tmp_prog_arg) tmp_header_file = open(tmp_prog_arg.extract_header_file_path) tmp_nonsense = 0 tmp_real = 0 for tmp_s in tmp_header_file.readlines(): #if(tmp_s.find("*/ ")==-1): # continue #tmp_s = tmp_s[tmp_s.find("*/ ")+3:] #if(tmp_s.find("// ") == -1): # print("header file extract not correct") # sys.exit() #tmp_s = tmp_s[:tmp_s.find("// ")] tmp_s = tmp_s.replace("\n", "") for tmp_i in range(10): tmp_s = tmp_s.replace(str(tmp_i), '') if (len(tmp_s) <= 6): continue #print(tmp_s) #print(len(tmp_s)) if nonsense(tmp_s): tmp_nonsense = tmp_nonsense + 1 else: tmp_real = tmp_real + 1 if tmp_nonsense + tmp_real == 0: print("didn't find useful name") sys.exit() tmp_result = float(tmp_real) / (tmp_nonsense + tmp_real) if tmp_result < 0.9: print("this application is obfuscated") tmp_prog_arg.my_app_info.is_obfuscated = True else: tmp_prog_arg.my_app_info.is_obfuscated = False
ct_numbers = [] for letter in ct: number = ord(letter) - 65 ct_numbers.append(number) a_inv = [] b = [] for i in range(26): b.append(i) for i in range(1, 26, 2): inv = modinv(i, 26) if inv is not None: a_inv.append(inv) pt = "" for i in b: for j in a_inv: for number in ct_numbers: if number >= 0: pt_letter = (j * (number - i)) % 26 pt += str(chr(pt_letter + 65)) else: pt += " " if not nonsense(pt): print("PT: {} -> A_inv: {}, B: {}".format(pt, j, i)) pt = ""
async def check_manually(message): # This command used to test new antispammers AI functions user, txt = await aio_get_user(message, allow_self=True) if not user: return user_id = user['user_id'] name = user['first_name'] user_pics = await tbot(GetUserPhotosRequest( int(user['user_id']), offset=0, max_id=0, limit=100)) if user['last_name']: name += user['last_name'] num = 0 text = "User " + await user_link_html(user['user_id']) text += "\nName: " + name text += "\nID: <code>" + str(user['user_id']) + '</code>' text += '\n' gbanned = mongodb.blacklisted_users.find_one({'user': user_id}) if gbanned: text += "\n<b>Warn! User gbanned in SophieBot!</b>" text += f"\nDate: <code>{gbanned['date']}</code>" text += f"\nReason: <code>{gbanned['reason']}</code>" text += '\n' num += 999 else: text += "\nUser not gbanned in SophieBot" api_url = "https://api.unifiedban.solutions/blacklist/check/" + str(user_id) ubanned = requests.get(api_url, headers={'Authorization': CONFIG['advanced']['utoken']}) if ubanned.text == '{"Error": "No data"}': text += "\nUser not ubanned." if user['first_name'].replace(' ', '').isdigit(): text += "\n<b>Warn! User have name with only numbers!</b>" num += 80 if user['first_name'].lower() in NAMES: text += "\n<b>Warn! User have real name (Mostly spammers try to be like real human)!</b>" num += 75 if user_pics and len(user_pics.photos) == 1: text += "\n<b>Warn! User have only 1 display picture!</b>" num += 40 if user_pics and len(user_pics.photos) == 0: text += "\n<b>Warn! User don't have any DP!</b>" num += 25 try: check = nonsense(name) if check is True: text += "\n<b>Warn! User have noncence name!</b>" num += 85 else: text += "\nUser have normal name" except ValueError: text += "\nName too short to analyse it" # Counterweight if '#' in name: text += "\nUser have hashtag in name, mostly only real users have it" num -= 20 if "☭" in name: text += "\nGood soveit boi." num -= 20 if "🌈" in name: text += "\nGei detected." num -= 20 if "🦊" in name: text += "\nHa, this guy is a fox lover." num -= 20 for owo in COUNTRY_EMOJIS: if owo in name: text += "\nHa, This guy love own country" num -= 20 break # text += "\n\nDebug: Real suspicion numer: " + str(num) if num > 100: num = 100 if num < 0: num = 0 text += '\n\n<b>Suspicion: </b><code>' + str(num) + "%</code>" await message.reply(str(text))
for url in urls_list: try: response = requests.get(url).text for match in get_varnames(response): params.add(match) except: pass else: input_data = ' '.join(read_in()) for match in get_varnames(input_data): params.add(match) if args.smart: print('using smart detection') for param in params: regex = re.compile( '[^a-zA-Z]' ) # pattern matches only alpha because nostril only works on alpha nostril_safe_param = regex.sub('', param) if len(nostril_safe_param) > 6: if not nonsense(nostril_safe_param): print(param) else: print( param ) # print everything under 7 chars, nostril won't work on anything shorter. else: for param in params: print(param)
def classifier(stream_strings): """Rule based classification for the input string Args: str: A string to be processed Returns: str: category/ class of the string """ # Augment the string with splits and elimination of spaces and changing to lower case stream_strings_org = stream_strings stream_strings = stream_strings.lower() stream_strings = ' '.join(stream_strings.split()).replace(',', '') stream_strings_space = stream_strings.split(' ') stream_strings_mod = list( set(stream_strings_space + [stream_strings] + [stream_strings_org])) # Variables for rule based matching ents = [] in_vocab = [] poss = {} alphas = {} like_nums = {} for token in nlp(' '.join(stream_strings_org.split()).lower( )): #Iterate through string after converting into a spacy doc poss[token.text] = str(token.pos_) #Part of Speech for the token alphas[token.text] = str(token.is_alpha) #Check if token is alphabetic like_nums[token.text] = str(token.like_num) #Check if token is numeric if nlp.vocab.strings[token.text] in nlp.vocab: in_vocab.append( token.text) #Check if token is present in vocabulary for stream_string in stream_strings_mod: # Check the entity labels doc = nlp(stream_string) for ent in doc.ents: if ent.label_ not in ents: ents.append(ent.label_) ents = list(set(ents)) #Rule for serial number if (len(ents) == 0 and len(in_vocab) == 0) or nonsense( ''.join(stream_strings_org.lower().split()) * 3) == True or ''.join( stream_strings_org.split()).isnumeric() == True: return 'serial_number' #Rule for physical good/ Product if (len(nlp(' '.join(stream_strings_org.split()))) == len(in_vocab)) and (len(ents) == 0 or 'PRODUCT' in ents): return 'product' #Rule for Address if ('CARDINAL' in ents) or ('FAC' in ents): return 'address' #Rule for Geographic location if ('GPE' in ents or 'LOC' in ents) and ('ORG' not in ents): return 'location' #Rule for Company Name if 'ORG' in ents: return 'company_name' return 'product'
from nostril import nonsense INPUT_FILENAME = "brute_force_results.txt" OUTPUT_FILENAME = "meaningful_results.txt" with open(INPUT_FILENAME) as input_file, open(OUTPUT_FILENAME, "w") as output_file: for line in input_file: if not nonsense(line): output_file.write(line)
def run(self, record): ''' The list of features will be: - The number of domain parts, for example, www.google.com is 3. - The overall length in characters. - The length of the longest domain part. - The length of the TLD, e.g. .online or .download is longer than .com. - The randomness level of the domain. ''' if 'analysers' not in record: record['analysers'] = [] x_samples = [] Y_samples = [] for analyser in record['analysers']: if analyser['analyser'] != 'WordSegmentation': continue for domain, segments in analyser['output'].items(): # Remove wildcard domain domain = re.sub(r'^\*\.', '', domain) parts = domain.split('.') x = [] # Compute the number of domain parts x.append(len(parts)) # Compute the length of the whole domain x.append(len(domain)) longest = '' # Compute the length of the longest domain parts for part in parts: if len(part) > len(longest): longest = part x.append(len(longest)) # Compute the length of the TLD x.append(len(parts[-1])) randomness_count = 0 # The nostril package which we are using to detect non-sense words # in the domain only returns a boolean verdict so may be we need to # think of how we want to quantify this for w in segments: try: if len(w) >= FeaturesGenerator.NOSTRIL_LENGTH_LIMIT and nonsense(w): randomness_count += 1 except ValueError: continue x.append(randomness_count / len(segments)) x_samples.append(x) Y_samples.append(True if 'usual_suspect' in record else False) break record['analysers'].append({ 'analyser': type(self).__name__, 'output': x_samples, }) return record
def chat(): content = request.json history = content['history'] query = content['query'] task_name = content['task_name'] lang_id = content['lang_id'] top_p = content['top_p'] temp = content['temp'] rept = content['rept'] lon = content['lon'] lat = content['lat'] if (type(history) == str or len(history) == 0 or history[0] == ""): history = [] query = translate(query, lang_id, "en", auto=True if task_name == "AUTOMODE" else False) emoji_user = get_emoji_score([query]) history.append(query) if task_name == "CovidQA": ## COVID QA if len(query) <= 20 or nonsense(query): resp = "Your input doesn't make sense, input something more meaninful" else: r2 = requests.get('https://covid19api.emos.ai/api/v1/summary/', params={'q': query}) if (r2 != "<Response [200]>"): resp = eval(r2.json()[0]['data'])["extractive"] else: resp = "This service is currenlty not available." meta = [] viz_meta = [] task_id = 20 elif task_name == "debunker": ## COVID DEBUNKER if len(query) <= 20 or nonsense(query): resp = "Your input doesn't make sense, input something more meaninful" else: ppl = get_perplexity(query) print(ppl) if (float(ppl) > 170.): #### to be tuned resp = "This claim is likely to be fake" else: resp = "This claim is likely to be real" meta = [] viz_meta = [] task_id = 16 else: resp, meta, task_name, task_id, viz_meta = generate( history=history, task_name=task_name, meta_seed=1, top_p=float(top_p), temperature=float(temp), repetion=float(rept), lon=float(lon), lat=float(lat)) toxic_ = False if task_name == "Neural": toxic_resp = get_toxic_score(query, resp) if (toxic_resp != ""): resp = toxic_resp toxic_ = True history.append(resp) resp = translate(resp, "en", lang_id, auto=True if task_name == "AUTOMODE" else False) emoji_resp = get_emoji_score([resp]) jsonFormat = { 'history': history, 'response': resp, 'user_emoji': emoji_user, 'resp_emoji': emoji_resp, 'toxic': toxic_, 'task_name': task_name, 'task_id': task_id, 'meta': meta, 'viz_meta': viz_meta, "lon": lon, "lat": lat } with open("data/conversation_history/" + "{}.json".format(timeStamped("conv")), "w", encoding="utf-8") as f: json.dump(jsonFormat, f, indent=4) return jsonify(jsonFormat) # last one is to to allow CORS [important]