コード例 #1
0
ファイル: myFunctions.py プロジェクト: jonsag/pySubs
def checkLang(myFile, verbose):  # checks file for language and returns language code, or if is doubtful returns "xx"
    tryNumber = 0  # starting up counter
    finished = False
    status = detectlanguage.user_status()  # get status for the account at detectlanguage.com
    if status['status'] == "SUSPENDED":
        print "*** Account at detectlanguage.com is suspended"
        print "    Run %s -d to see status" % sys.argv[0]
        print "    Quitting...\n"
        exit(7)
    with open(myFile) as theFile:
        fileLines = sum(1 for line in theFile)  # number of lines in file
    theFile.close()  # close file
    while True:
        if tryNumber * detectRows >= fileLines:
            print "*** File only has %d lines. No more lines to send. Accepting answer" % fileLines
            break

        with open(myFile) as theFile:  # open file
            head = list(islice(theFile, tryNumber * detectRows, (tryNumber + 1) * detectRows))  # select rows from file
        theFile.close()  # close file

        text = convertText(head, verbose)  # convert all strange characters, remove special characters and so on

        print "--- Sending rows %d-%d to detectlanguage.com" % (tryNumber * detectRows, (tryNumber + 1) * detectRows)
        result = detectlanguage.detect(text)  # detect language

        if result[0]['isReliable']:  # result is reliable
            langCode = str(result[0]['language'])  # langCode set to answer from detectlanguage.com
            print "--- Got %s - %s" % (langCode, langName(langCode))

            for lang in prefLangs:  # run through the prefered languages
                if lang == langCode:  # recieved language is one of the prefered languages
                    finished = True  # search for language code is finished
                    break  # break out of this for loop

            if finished:
                break  # break out of the while loop
            else:
                print "*** Not one of your prefered languages"
        else:
            langCode = "xx"
            print "*** Got unreliable answer. Confidence is %s" % str(result[0]['confidence'])
        tryNumber += 1  # counting number of trys
        if tryNumber > maxTrys:  # reached maximum number of trys
            print "*** Max number of trys reached. Accepting answer"
            finished = True
            # break
        if finished:
            break

    if langCode == "xx":
        print "detectlanguage.com can't determine language code"
    else:
        print "detectlanguage.com says languagecode is %s" % langCode

    confidence = result[0]['confidence']
    print "detectlanguage.com says confidence is %s" % confidence
    return langCode
コード例 #2
0
import detectlanguage

detectlanguage.configuration.api_key = "609c401c57d4180ddbf29d57ad9b273c"
print(detectlanguage.detect("Buenos dias señor"))
print(detectlanguage.simple_detect("तक को बनाया स्टार"))
print(detectlanguage.user_status())
コード例 #3
0
                    if len(donnes.split()) > 5:
                        doc[champ] = donnes
            else:
                print("file ignored ", fil, champ)

    elif bre['label'] in OT:
        file = [truc for truc in lstUnk if
                bre['label'] == truc.split('-')[1].replace('.txt', "") and "Consistent" not in truc][0]
        with open(file, 'r', encoding='utf8') as fichier:
            contenu = fichier.read()
        if len(contenu) > 0:
            phrase = contenu.split('.')[0]

            if len(phrase) > 0 and len(phrase.split()) > 5:  # more than 5 words
                try:
                    if detectlanguage.user_status()['requests'] < detectlanguage.user_status() ['daily_requests_limit'] -1:
                        lang = detectlanguage.simple_detect(phrase)
                    else:
                        time.sleep(86400) # wait ONE day!
                        lang = detectlanguage.simple_detect(phrase)
                except:
                    time.sleep(2)
                    try:
                        if detectlanguage.user_status()['requests'] < detectlanguage.user_status()[
                            'daily_requests_limit'] - 1:
                            lang = detectlanguage.simple_detect(phrase)
                        else:
                            time.sleep(86400)  # wait ONE day!
                            lang = detectlanguage.simple_detect(phrase)
                    except:
                        lang = ''
コード例 #4
0
	def test_user_status(self):
		result = detectlanguage.user_status()
		eq_('ACTIVE', result['status'])
コード例 #5
0
 def test_user_status(self):
     result = detectlanguage.user_status()
     eq_('ACTIVE', result['status'])
コード例 #6
0
import pandas as pd
import pyorc

DETECT_LANGUAGE_API_KEY = "c6e79e234e81e160db81454d80ae611d"
detectlanguage.configuration.api_key = DETECT_LANGUAGE_API_KEY
ORC_FILE = "out/spambase_{}.orc"

source_file_path = "/home/quentin/Dev/Spam-detector-pipeline/validation.csv"  # sys.argv[1]
if not os.path.isfile(source_file_path):
    print(f"Wrong file path {source_file_path}, exit script.")
    exit()

max_requests = 1000  # int(sys.argv[2]) if len(sys.argv) > 2 else 1000
offset = 0  # int(sys.argv[3]) if len(sys.argv) > 3 else 0

dl_user_status = detectlanguage.user_status()
available_requests = dl_user_status['daily_requests_limit'] - dl_user_status[
    'requests']
if available_requests == 0:
    print("Quota of requests at DetectLanguage exhausted for today.")
    exit()

df = pd.read_csv(source_file_path, header=0)
nb_lines = min(available_requests, len(df) - offset, max_requests)

df = df[offset:offset + nb_lines].copy().reset_index(drop=True)

response = detectlanguage.detect(df["text"].values.tolist())
first_languages = list(
    map(
        lambda x: x[0] if x else {
コード例 #7
0
def detectors_status():
    return detectlanguage.user_status()
コード例 #8
0
ファイル: detectLang.py プロジェクト: hodakermani/utils
import detectlanguage

detectlanguage.configuration.api_key = "7e8ac16dc6ab196f2449de5fd7d7f70b"

# Enable secure mode (SSL) if you are passing sensitive data
# detectlanguage.configuration.secure = True

print(detectlanguage.simple_detect("Buenos dias señor"))
print(detectlanguage.detect("سلام گلم"))
print(detectlanguage.detect("Oui"))

detectlanguage.user_status()
コード例 #9
0
    # Load configuration
    with open('config.json', 'r') as f:
        config = json.load(f)
        detectlanguage_api_key = config['detectlanguage_api_key']
        database_host = config['database_host']
        database_name = config['database_name']
        twitterStatusCol = config['source_box'] + "_twitterStatus"

    client = MongoClient('mongodb://' + database_host + ':27017/')
    db = client[database_name]

    detectlanguage.configuration.api_key = detectlanguage_api_key

    while True:
        try:
            if detectlanguage.user_status()['requests'] >= detectlanguage.user_status()['daily_requests_limit']:
                logging.debug("Number of requests over daily limit.")
                time.sleep(60)

            statuses = db[twitterStatusCol].find({ "language_detections.language": { "$exists": False } })

            if statuses:
                count = 0
                batch_request = []
                batch_status = []
                for twitterStatus in statuses:
                    if count >= 500:
                        logging.debug("Processing batch ...")
                        detections = detectlanguage.detect(batch_request)

                        if len(detections) != 500: