def checkLang(myFile, verbose): # checks file for language and returns language code, or if is doubtful returns "xx" tryNumber = 0 # starting up counter finished = False status = detectlanguage.user_status() # get status for the account at detectlanguage.com if status['status'] == "SUSPENDED": print "*** Account at detectlanguage.com is suspended" print " Run %s -d to see status" % sys.argv[0] print " Quitting...\n" exit(7) with open(myFile) as theFile: fileLines = sum(1 for line in theFile) # number of lines in file theFile.close() # close file while True: if tryNumber * detectRows >= fileLines: print "*** File only has %d lines. No more lines to send. Accepting answer" % fileLines break with open(myFile) as theFile: # open file head = list(islice(theFile, tryNumber * detectRows, (tryNumber + 1) * detectRows)) # select rows from file theFile.close() # close file text = convertText(head, verbose) # convert all strange characters, remove special characters and so on print "--- Sending rows %d-%d to detectlanguage.com" % (tryNumber * detectRows, (tryNumber + 1) * detectRows) result = detectlanguage.detect(text) # detect language if result[0]['isReliable']: # result is reliable langCode = str(result[0]['language']) # langCode set to answer from detectlanguage.com print "--- Got %s - %s" % (langCode, langName(langCode)) for lang in prefLangs: # run through the prefered languages if lang == langCode: # recieved language is one of the prefered languages finished = True # search for language code is finished break # break out of this for loop if finished: break # break out of the while loop else: print "*** Not one of your prefered languages" else: langCode = "xx" print "*** Got unreliable answer. Confidence is %s" % str(result[0]['confidence']) tryNumber += 1 # counting number of trys if tryNumber > maxTrys: # reached maximum number of trys print "*** Max number of trys reached. Accepting answer" finished = True # break if finished: break if langCode == "xx": print "detectlanguage.com can't determine language code" else: print "detectlanguage.com says languagecode is %s" % langCode confidence = result[0]['confidence'] print "detectlanguage.com says confidence is %s" % confidence return langCode
import detectlanguage detectlanguage.configuration.api_key = "609c401c57d4180ddbf29d57ad9b273c" print(detectlanguage.detect("Buenos dias señor")) print(detectlanguage.simple_detect("तक को बनाया स्टार")) print(detectlanguage.user_status())
if len(donnes.split()) > 5: doc[champ] = donnes else: print("file ignored ", fil, champ) elif bre['label'] in OT: file = [truc for truc in lstUnk if bre['label'] == truc.split('-')[1].replace('.txt', "") and "Consistent" not in truc][0] with open(file, 'r', encoding='utf8') as fichier: contenu = fichier.read() if len(contenu) > 0: phrase = contenu.split('.')[0] if len(phrase) > 0 and len(phrase.split()) > 5: # more than 5 words try: if detectlanguage.user_status()['requests'] < detectlanguage.user_status() ['daily_requests_limit'] -1: lang = detectlanguage.simple_detect(phrase) else: time.sleep(86400) # wait ONE day! lang = detectlanguage.simple_detect(phrase) except: time.sleep(2) try: if detectlanguage.user_status()['requests'] < detectlanguage.user_status()[ 'daily_requests_limit'] - 1: lang = detectlanguage.simple_detect(phrase) else: time.sleep(86400) # wait ONE day! lang = detectlanguage.simple_detect(phrase) except: lang = ''
def test_user_status(self): result = detectlanguage.user_status() eq_('ACTIVE', result['status'])
import pandas as pd import pyorc DETECT_LANGUAGE_API_KEY = "c6e79e234e81e160db81454d80ae611d" detectlanguage.configuration.api_key = DETECT_LANGUAGE_API_KEY ORC_FILE = "out/spambase_{}.orc" source_file_path = "/home/quentin/Dev/Spam-detector-pipeline/validation.csv" # sys.argv[1] if not os.path.isfile(source_file_path): print(f"Wrong file path {source_file_path}, exit script.") exit() max_requests = 1000 # int(sys.argv[2]) if len(sys.argv) > 2 else 1000 offset = 0 # int(sys.argv[3]) if len(sys.argv) > 3 else 0 dl_user_status = detectlanguage.user_status() available_requests = dl_user_status['daily_requests_limit'] - dl_user_status[ 'requests'] if available_requests == 0: print("Quota of requests at DetectLanguage exhausted for today.") exit() df = pd.read_csv(source_file_path, header=0) nb_lines = min(available_requests, len(df) - offset, max_requests) df = df[offset:offset + nb_lines].copy().reset_index(drop=True) response = detectlanguage.detect(df["text"].values.tolist()) first_languages = list( map( lambda x: x[0] if x else {
def detectors_status(): return detectlanguage.user_status()
import detectlanguage detectlanguage.configuration.api_key = "7e8ac16dc6ab196f2449de5fd7d7f70b" # Enable secure mode (SSL) if you are passing sensitive data # detectlanguage.configuration.secure = True print(detectlanguage.simple_detect("Buenos dias señor")) print(detectlanguage.detect("سلام گلم")) print(detectlanguage.detect("Oui")) detectlanguage.user_status()
# Load configuration with open('config.json', 'r') as f: config = json.load(f) detectlanguage_api_key = config['detectlanguage_api_key'] database_host = config['database_host'] database_name = config['database_name'] twitterStatusCol = config['source_box'] + "_twitterStatus" client = MongoClient('mongodb://' + database_host + ':27017/') db = client[database_name] detectlanguage.configuration.api_key = detectlanguage_api_key while True: try: if detectlanguage.user_status()['requests'] >= detectlanguage.user_status()['daily_requests_limit']: logging.debug("Number of requests over daily limit.") time.sleep(60) statuses = db[twitterStatusCol].find({ "language_detections.language": { "$exists": False } }) if statuses: count = 0 batch_request = [] batch_status = [] for twitterStatus in statuses: if count >= 500: logging.debug("Processing batch ...") detections = detectlanguage.detect(batch_request) if len(detections) != 500: