Exemple #1
0
def list_workspaces(iam_apikey, version, url):
    authenticator = IAMAuthenticator(iam_apikey)
    if WATSON_SERVICE != 'nlc':
        c = AssistantV1(version=version, authenticator=authenticator)
        c.set_service_url(url)
        return c.list_workspaces()
    else:
        c = NaturalLanguageClassifierV1(authenticator)
        c.set_service_url(url)
        return c.list_classifiers()
Exemple #2
0
def createClassifier():
    authenticator = IAMAuthenticator('API_KEY')
    natural_language_classifier = NaturalLanguageClassifierV1(authenticator=authenticator)
    natural_language_classifier.set_service_url('SERVICE_URL')
    with open('./labels.csv', 'rb') as training_data:
        classifier = natural_language_classifier.create_classifier(
        training_data=training_data,
        training_metadata='{"name": "Classifier","language": "en"}'
    ).get_result()
    print(json.dumps(classifier, indent=2))
def checkWholeDoc(fulltext):
    #Authenticate API
    authenticator = IAMAuthenticator('API_KEY')
    natural_language_classifier = NaturalLanguageClassifierV1(
        authenticator=authenticator)
    natural_language_classifier.set_service_url('SERVICE_URL')

    # Process whole doc
    for i in fulltext:
        classes = natural_language_classifier.classify('35c0a4x769-nlc-127',
                                                       i).get_result()
        topClass = classes["top_class"]
        if topClass in FLAGGED_CLASSES:
            return True
Exemple #4
0
def main():
    output = {}

    # Load the classifier
    authenticator = IAMAuthenticator(
        'd61RIxMA4RwhewIoThcevX0xJqAo80mMyAnkjwNb8ePy')
    discovery = DiscoveryV1(version='2018-08-01', authenticator=authenticator)
    discovery.set_service_url(
        'https://gateway.watsonplatform.net/natural-language-classifier/api')
    classifier = NLClassifier(authenticator)

    # Take Input, can input multiple lines
    with open('../data/test_data.csv', newline='') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            text = row[0]
            # Clean the text
            for mark in del_marks:
                text = text.replace(mark, '')
            text = text.replace(''', '\'')

            # Classify
            result = classifier.classify('90dbdex665-nlc-266',
                                         text).get_result()
            confidence = result['classes'][0]['confidence']
            normalized_confidence = (confidence - min_confidence) / (
                max_confidence - min_confidence)
            output[text] = normalized_confidence
    csvfile.close()

    # Write output
    file = open('output.txt', 'w+')
    for out in output:
        string = out + ', ' + str(output[out])
        file.write(string)
    file.close()
def delete_workspaces(iam_apikey, url, version, workspace_ids):
    """ Delete workspaces
    """
    authenticator = IAMAuthenticator(iam_apikey)

    for workspace_id in workspace_ids:
        if 'natural-language-classifier' in url:
            c = NaturalLanguageClassifierV1(authenticator=authenticator)
            c.set_service_url(url)
            c.delete_classifier(classifier_id=workspace_id)
        else:
            c = AssistantV1(version=version, authenticator=authenticator)
            c.set_service_url(url)
            c.delete_workspace(workspace_id=workspace_id)

    print('Cleaned up workspaces')
Exemple #6
0
def processAddresses(lines, lineNums):
    #Authenticate API
    authenticator = IAMAuthenticator('API_KEY')
    natural_language_classifier = NaturalLanguageClassifierV1(
        authenticator=authenticator
    )
    natural_language_classifier.set_service_url('SERVICE_URL')
    boxes = []
    for l in lineNums:
        for word in lines[l]["words"]:
            classes = natural_language_classifier.classify('35c0a4x769-nlc-127',word["text"]).get_result()
            topClass = classes["top_class"]
            if topClass == "address":
                boxes.append(word["boundingBox"][0:2] + word["boundingBox"][4:7])
                boxes.append(word["boundingBox"][2:4] + word["boundingBox"][6:8])
    return boxes
def training(f_name, api_key, model_name):
    authenticator = IAMAuthenticator(api_key)
    natural_language_classifier = NaturalLanguageClassifierV1(
        authenticator=authenticator)
    natural_language_classifier.set_service_url(
        'https://gateway.watsonplatform.net/natural-language-classifier/api')

    with open(os.path.join(os.path.dirname('__file__'), f_name),
              'rb') as training_data:
        metadata = json.dumps({'name': model_name, 'language': 'en'})
        classifier = natural_language_classifier.create_classifier(
            training_metadata=metadata,
            training_data=training_data).get_result()

    subprocess.call(f'rm {f_name}', shell=True)

    return classifier
def processLines(lines):
    #Authenticate API
    authenticator = IAMAuthenticator('API_KEY')
    natural_language_classifier = NaturalLanguageClassifierV1(
        authenticator=authenticator)
    natural_language_classifier.set_service_url('SERVICE_URL')

    # Store lines that have a term
    labels = {"race": [], "color": [], "address": []}

    # Check each line in the document
    for i in range(len(lines)):
        classes = natural_language_classifier.classify(
            '35c0a4x769-nlc-127', lines[i]["text"]).get_result()
        topClass = classes["top_class"]
        if topClass in FLAGGED_CLASSES:
            # print(topClass, ":", lines[i]["text"])
            labels[topClass].append(i)
    return labels
Exemple #9
0
def delete_workspaces(iam_apikey, url, version, workspace_ids, auth_type):
    """ Delete workspaces
    """
    if auth_type == 'iam':
        authenticator = IAMAuthenticator(iam_apikey)
    elif auth_type == 'bearer':
        authenticator = BearerTokenAuthenticator(iam_apikey)
    else:
        raise ValueError(f'Unknown auth_type "{auth_type}"')

    for workspace_id in workspace_ids:
        if 'natural-language-classifier' in url:
            c = NaturalLanguageClassifierV1(authenticator=authenticator)
            c.set_service_url(url)
            c.delete_classifier(classifier_id=workspace_id)
        else:
            c = AssistantV1(version=version, authenticator=authenticator)
            c.set_service_url(url)
            c.delete_workspace(workspace_id=workspace_id)

    print('Cleaned up workspaces')
def delete():
    model_name = request.json['model_name']
    api_key = request.json['api_key']

    authenticator = IAMAuthenticator(api_key)
    natural_language_classifier = NaturalLanguageClassifierV1(
        authenticator=authenticator)
    natural_language_classifier.set_service_url(
        'https://gateway.watsonplatform.net/natural-language-classifier/api')

    classifier_id = None
    for c in natural_language_classifier.list_classifiers().get_result(
    )['classifiers']:
        if c['name'] == model_name:
            classifier_id = c['classifier_id']
    if not classifier_id:
        return {'message': "Classifier not found, check its name again"}
    else:
        natural_language_classifier.delete_classifier(classifier_id)
        res = {'message': f"Model '{model_name}' Deleted"}

    return jsonify(res)
Exemple #11
0
def func(args):
    classifier_name = ''
    classifier_description = ''
    classes = []
    language = 'en'
    counterexamples = []
    metadata = {}
    learning_opt_out = False

    authenticator = choose_auth(args)

    nlc = NaturalLanguageClassifierV1(authenticator=authenticator)
    nlc.set_service_url(args.url)

    classifier_name = "My Classifier"
    if args.classifier_name is not None:
        classifier_name = args.classifier_name
    metadata = {"name": classifier_name, "language": "en"}

    if args.trainingFile is not None:
        with open(args.trainingFile, 'rb') as training_data:
            classifier = nlc.create_classifier(
                training_data=training_data,
                training_metadata=json.dumps(metadata)).get_result()

    resp = classifier
    # Poke the training status every SLEEP_INCRE secs
    sleep_counter = 0
    while sleep_counter < TIME_TO_WAIT:
        raw_resp = nlc.get_classifier(classifier_id=resp[CLASSIFIER_ID_TAG])
        resp = raw_resp.get_result()
        if resp['status'] == 'Available':
            print(json.dumps(resp, indent=4))  # double quoted valid JSON
            return
        sleep_counter += SLEEP_INCRE
        sleep(SLEEP_INCRE)
    raise TrainTimeoutException('NLC training timeout')
import json
from ibm_watson import NaturalLanguageClassifierV1

service = NaturalLanguageClassifierV1(
    url='https://gateway.watsonplatform.net/natural-language-classifier/api',
    iam_apikey='JieYHJwBRgrd5Rl9R4q63d5DWvAuffdrRIj1jKhkfoAH')

classes = service.classify('8a423bx518-nlc-1830',
        'Airline lost my luggage twice in the same month, delayed my flight, ' + \
        'and didn\'t care about my stopover!').get_result()
print(json.dumps(classes, indent=2))
Exemple #13
0
def func(args):
    in_df = None
    out_df = None
    test_column = UTTERANCE_COLUMN

    if args.test_column is not None:  # Test input has multiple columns
        test_column = args.test_column
        in_df = pd.read_csv(args.infile,
                            quoting=csv.QUOTE_ALL,
                            encoding=UTF_8,
                            keep_default_na=False)
        if test_column not in in_df:  # Look for target test_column
            raise ValueError(
                "Test column {} doesn't exist in file.".format(test_column))

        if args.merge_input:  # Merge rest of columns from input to output
            out_df = in_df
        else:
            out_df = in_df[[test_column]].copy()
            out_df.columns = [test_column]

    else:
        test_series = pd.read_csv(args.infile,
                                  quoting=csv.QUOTE_ALL,
                                  encoding=UTF_8,
                                  header=None,
                                  squeeze=True,
                                  keep_default_na=False)
        if isinstance(test_series, pd.DataFrame):
            raise ValueError('Unknown test column')
        # Test input has only one column and no header
        out_df = test_series.to_frame()
        out_df.columns = [test_column]

    # Initial columns for test output
    for column in test_out_header:
        out_df[column] = ''

    # Applied coroutines
    sem = asyncio.Semaphore(args.rate_limit)
    loop = asyncio.get_event_loop()

    authenticator = choose_auth(args)

    nlc = NaturalLanguageClassifierV1(authenticator=authenticator)
    nlc.set_service_url(args.url)

    tasks = (fill_df(out_df.loc[row_idx, test_column], row_idx, out_df,
                     args.workspace_id, nlc, sem)
             for row_idx in range(out_df.shape[0]))
    loop.run_until_complete(asyncio.gather(*tasks))

    loop.close()

    if args.golden_intent_column is not None:
        golden_intent_column = args.golden_intent_column
        if golden_intent_column not in in_df.columns:
            print("No golden intent column '{}' is found in input.".format(
                golden_intent_column))
        else:  # Add INTENT_JUDGE_COLUMN based on golden_intent_column
            out_df[INTENT_JUDGE_COLUMN] = \
                (in_df[golden_intent_column]
                    == out_df[PREDICTED_INTENT_COLUMN]).map(BOOL_MAP)
            out_df[SCORE_COLUMN] = \
                out_df[INTENT_JUDGE_COLUMN].map({'yes': 1, 'no': 0})

    if args.partial_credit_table is not None:
        credit_tables = parse_partial_credit_table(args.partial_credit_table)
        for row_idx in range(out_df.shape[0]):
            golden_intent = out_df.loc[row_idx,
                                       args.golden_intent_column].strip()
            predict_intent = out_df.loc[row_idx,
                                        PREDICTED_INTENT_COLUMN].strip()
            if golden_intent == predict_intent:
                out_df.loc[row_idx, SCORE_COLUMN] = 1.0
            elif golden_intent not in credit_tables or \
               predict_intent not in credit_tables[golden_intent]:
                out_df.loc[row_idx, SCORE_COLUMN] = 0
            else:
                out_df.loc[row_idx, SCORE_COLUMN] = \
                    credit_tables[golden_intent][predict_intent]

    save_dataframe_as_csv(df=out_df, file=args.outfile)
    print("Wrote test result file to {}".format(args.outfile))
def deleteClassifier():
    authenticator = IAMAuthenticator('API_KEY')
    natural_language_classifier = NaturalLanguageClassifierV1(
        authenticator=authenticator)
    natural_language_classifier.set_service_url('SERVICE_URL')
    natural_language_classifier.delete_classifier('35c0a4x769-nlc-127')
Exemple #15
0
import json
import os
import requests

from dotenv import load_dotenv
from flask import Flask, render_template, request

from ibm_watson import NaturalLanguageClassifierV1

DEBUG = True
app = Flask(__name__)

load_dotenv(os.path.join(os.path.dirname(__file__), ".env"))
classifier_id = os.environ.get("CLASSIFIER_ID")

NLC_SERVICE = NaturalLanguageClassifierV1()


@app.route('/')
def default():
    classifier_info = "cannot detect classifier"
    if NLC_SERVICE:
        classifier_info = ("classifier detected, using API: " +
                           NLC_SERVICE.service_url)
    return render_template('index.html',
                           classifier_info=classifier_info,
                           icd_code="",
                           icd_output="",
                           classifier_output="")

Exemple #16
0
def testing(df_test, model_name, api_key, email_id=[], emailed=False):
    authenticator = IAMAuthenticator(api_key)
    natural_language_classifier = NaturalLanguageClassifierV1(
        authenticator=authenticator)
    natural_language_classifier.set_service_url('https://gateway.watsonplatform.net/natural-language-classifier/api')
    
    classifier_id = None
    for c in natural_language_classifier.list_classifiers().get_result()['classifiers']:
        if c['name']==model_name:
            classifier_id = c['classifier_id']
    if not classifier_id:
        return {'message':"Classifier not found, check its name again"}
    
    df_test = pd.DataFrame(df_test)
#     print(type(df_test), df_test.shape, df_test.columns)
    df_test.index = df_test.index.astype(int)
    df_test['text'] = df_test['text'].str[:1024]
    df_test['text'] = df_test['text'].apply(lambda x: ((x.encode("unicode_escape").decode("utf-8"))[:1024]).strip())
    
    y_true = df_test['label'].to_numpy()
    y_pred = []
    conf = []
    numbers = []
    while(True):
        status = natural_language_classifier.get_classifier(classifier_id).get_result()['status']
        if (status=='Training') and (emailed==False):
            return {'message':"Classifier not Trained, try when its available"}
        if status=='Training':
            continue
        for num, example in zip(df_test['id'], df_test['text']):
            numbers.append(num)
            classes = natural_language_classifier.classify(classifier_id, example).get_result()
        #     print(classes)
            pred_label = classes['top_class']
            pred_conf = classes['classes'][0]['confidence']
        #     print(pred_label, pred_conf)
            y_pred.append(pred_label)
            conf.append(pred_conf)
        break

    y_pred = np.array(y_pred, dtype=np.object)
    numbers = np.array(numbers, dtype=np.object)
    conf = np.array(conf, dtype=np.float64)
    
    df_result = pd.DataFrame(columns=['real','pred','conf','id'])
    df_result['id'] = numbers
    df_result['pred'] = y_pred
    df_result['conf'] = conf
    df_result['real'] = y_true
    
    report = classification_report(y_true, y_pred, output_dict=True)
    report = pd.DataFrame(report).T
#     report = report[~report.index.isin(['accuracy', 'macro avg', 'weighted avg'])]
    report_txt = classification_report(y_true, y_pred)
    conf_mat = pd.crosstab(df_result['real'], df_result['pred'], rownames=['Actual'], colnames=['Pred'])
    acc = accuracy_score(y_true, y_pred)
    
    global do_ml_flow
    if do_ml_flow:
        log_metric("Test Accuracy", acc)
        
        df_result.to_excel(f'tmp/df_result -- {model_name}.xlsx', index=False)
        report.to_excel(f'tmp/report -- {model_name}.xlsx')
        with open(f'tmp/report -- {model_name}.txt', 'w') as f:
            print(report_txt, file=f)
        conf_mat.to_excel(f'tmp/conf_mat -- {model_name}.xlsx')
    
        log_artifact(f'tmp/df_result -- {model_name}.xlsx')
        log_artifact(f'tmp/report -- {model_name}.xlsx')
        log_artifact(f'tmp/report -- {model_name}.txt')
        log_artifact(f'tmp/conf_mat -- {model_name}.xlsx')
        
        subprocess.call(f'rm "tmp/df_result -- {model_name}.xlsx"', shell=True)
        subprocess.call(f'rm "tmp/report -- {model_name}.xlsx"', shell=True)
        subprocess.call(f'rm "tmp/report -- {model_name}.txt"', shell=True)
        subprocess.call(f'rm "tmp/conf_mat -- {model_name}.xlsx"', shell=True)
        
    if emailed==False:
        res = {}
        res['df_res'] = df_result.to_dict()
        res['acc'] = acc
        res['report'] = report.to_dict()
        res['report_txt'] = report_txt
        res['conf_mat'] = conf_mat.to_dict()
        
        return res
    else:
        res = {}
        res['df_res'] = df_result.to_dict()
        res['acc'] = acc
        res['report'] = report.to_dict()
        res['report_txt'] = report_txt
        res['conf_mat'] = conf_mat.to_dict()
        
        with open(f'watson_results/{model_name}.json', 'w') as fp:
            json.dump(res, fp)
Exemple #17
0
# natural_language_classifier = NaturalLanguageClassifierV1(
#     authenticator=authenticator
# )

# natural_language_classifier.set_service_url('SERVICE_URL')

# status = natural_language_classifier.get_classifier('35ba1fx766-nlc-138').get_result()
# print (json.dumps(status, indent=2))

##################### Run classifier
import json
from ibm_watson import NaturalLanguageClassifierV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

authenticator = IAMAuthenticator('API_KEY')
natural_language_classifier = NaturalLanguageClassifierV1(
    authenticator=authenticator)

natural_language_classifier.set_service_url('SERVICE_URL')

classes = natural_language_classifier.classify('35ba1fx766-nlc-138',
                                               'Athens').get_result()
print(json.dumps(classes, indent=2))

##################### Delete classifier
# from ibm_watson import NaturalLanguageClassifierV1
# from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

# authenticator = IAMAuthenticator('API_KEY')
# natural_language_classifier = NaturalLanguageClassifierV1(
#     authenticator=authenticator
# )
import json
from ibm_watson import NaturalLanguageClassifierV1

natural_language_classifier = NaturalLanguageClassifierV1(
    iam_apikey='{iam-apikey}',
    url='https://gateway.watsonplatform.net/natural-language-classifier/api')

classes = natural_language_classifier.classify(
    '94904ex626-nlc-66', 'história muito legal sobre a cerveja').get_result()
print(classes['top_class'])
from __future__ import print_function
import json
import os

# from os.path import join, dirname
from ibm_watson import NaturalLanguageClassifierV1

# If service instance provides API key authentication
service = NaturalLanguageClassifierV1(
    ## url is optional, and defaults to the URL below. Use the correct URL for your region.
    url='https://gateway.watsonplatform.net/natural-language-classifier/api',
    iam_apikey='YOUR APIKEY')

# service = NaturalLanguageClassifierV1(
#     ## url is optional, and defaults to the URL below. Use the correct URL for your region.
#     # url='https://gateway.watsonplatform.net/natural-language-classifier/api',
#     username='******',
#     password='******')

classifiers = service.list_classifiers().get_result()
print(json.dumps(classifiers, indent=2))

# create a classifier
with open(
        os.path.join(os.path.dirname(__file__),
                     '../resources/weather_data_train.csv'),
        'rb') as training_data:
    metadata = json.dumps({'name': 'my-classifier', 'language': 'en'})
    classifier = service.create_classifier(
        metadata=metadata, training_data=training_data).get_result()
    classifier_id = classifier['classifier_id']
Exemple #20
0
from flask import Flask, render_template, request
from ibm_watson import NaturalLanguageClassifierV1

DEBUG = True
app = Flask(__name__)

load_dotenv(os.path.join(os.path.dirname(__file__), ".env"))

nlc_username = os.environ.get("NATURAL_LANGUAGE_CLASSIFIER_USERNAME")
nlc_password = os.environ.get("NATURAL_LANGUAGE_CLASSIFIER_PASSWORD")
nlc_iam_apikey = os.environ.get("NATURAL_LANGUAGE_CLASSIFIER_IAM_APIKEY")
classifier_id = os.environ.get("CLASSIFIER_ID")

# Use provided credentials from environment or pull from IBM Cloud VCAP
if nlc_iam_apikey != "placeholder":
    NLC_SERVICE = NaturalLanguageClassifierV1(iam_apikey=nlc_iam_apikey)
elif nlc_username != "placeholder":
    NLC_SERVICE = NaturalLanguageClassifierV1(username=nlc_username,
                                              password=nlc_password)
else:
    NLC_SERVICE = NaturalLanguageClassifierV1()


@app.route('/')
def default():
    classifier_info = "cannot detect classifier"
    if NLC_SERVICE:
        classifier_info = "classifier detected, using API: " + NLC_SERVICE.url
    return render_template('index.html',
                           classifier_info=classifier_info,
                           icd_code="",
Exemple #21
0
import json
import os
import ssl

from ibm_watson import NaturalLanguageClassifierV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

authenticator = IAMAuthenticator(
    'yjsnqyHlLV6Hre2gSL9LqtdAaU9hhRBm7Y_f3k8QTS0v')
service = NaturalLanguageClassifierV1(authenticator=authenticator)
service.set_service_url(
    'https://api.us-south.assistant.watson.cloud.ibm.com/instances/20b1e02e-12ff-4ab0-87e6-469aa5868021'
)

classifiers = service.list_classifiers().get_result()
print(json.dumps(classifiers, indent=2))

# create a classifier
with open(
        os.path.join(os.path.dirname(__file__),
                     '../resources/weather_data_train.csv'),
        'rb') as training_data:
    metadata = json.dumps({'name': 'my-classifier', 'language': 'en'})
    classifier = service.create_classifier(
        training_metadata=metadata, training_data=training_data).get_result()
    classifier_id = classifier['classifier_id']
    print(json.dumps(classifier, indent=2))

status = service.get_classifier(classifier_id).get_result()
print(json.dumps(status, indent=2))
import json
from ibm_watson import NaturalLanguageClassifierV1
from ibm_watson import ApiException
import Settings

try:
    natural_language_classifier = NaturalLanguageClassifierV1(
        iam_apikey=Settings.API_KEY, url=Settings.URL)

    with open(Settings.TRAINING_DATA_PATH, 'rb') as training_data:
        with open(Settings.METADATA_PATH, 'rb') as metadata:
            classifier = natural_language_classifier.create_classifier(
                training_data=training_data, metadata=metadata).get_result()
    print(json.dumps(classifier, indent=2))
except ApiException as ex:
    print("Method failed with status code " + str(ex.code) + ": " + ex.message)
'''
Expected response:
{
  "name": "TutorialClassifier",
  "language": "en",
  "status": "Training",
  "url": "https://gateway.watsonplatform.net/natural-language-classifier/api/v1/classifiers/0e6935x475-nlc-2948",
  "classifier_id": "0e6935x475-nlc-2948",
  "created": "2018-12-10T17:42:31.823Z",
  "status_description": "The classifier instance is in its training phase, not yet ready to accept classify requests"
}
'''
Exemple #23
0
from __future__ import print_function
import json
import os

# from os.path import join, dirname
from ibm_watson import NaturalLanguageClassifierV1

# If service instance provides API key authentication
service = NaturalLanguageClassifierV1(
    ## url is optional, and defaults to the URL below. Use the correct URL for your region.
    url='https://gateway.watsonplatform.net/natural-language-classifier/api',
    iam_apikey='DBxOesEcwYTQK9-dvcaxTwBICWk0s3RwwEW6m-2eppDn')

# service = NaturalLanguageClassifierV1(
#     ## url is optional, and defaults to the URL below. Use the correct URL for your region.
#     # url='https://gateway.watsonplatform.net/natural-language-classifier/api',
#     username='******',
#     password='******')

classifiers = service.list_classifiers().get_result()
print(json.dumps(classifiers, indent=2))

# create a classifier
with open(
        os.path.join(os.path.dirname(__file__),
                     '../resources/weather_data_train.csv'),
        'rb') as training_data:
    metadata = json.dumps({'name': 'my-classifier', 'language': 'en'})
    classifier = service.create_classifier(
        metadata=metadata, training_data=training_data).get_result()
    classifier_id = classifier['classifier_id']
import json
from ibm_watson import NaturalLanguageClassifierV1

natural_language_classifier = NaturalLanguageClassifierV1(
    iam_apikey='{apikey}', url='{url}')

status = natural_language_classifier.delete_classifier(
    '{classifier_id}').get_result()
print(json.dumps(status, indent=2))
Exemple #25
0
def main():
    # Define everything needed later
    inputs = []
    max_btwness = 0.0
    min_btwness = 0.0
    avg_btwness = 0.0

    # Load the classifier
    authenticator = IAMAuthenticator(
        'd61RIxMA4RwhewIoThcevX0xJqAo80mMyAnkjwNb8ePy')
    discovery = DiscoveryV1(version='2018-08-01', authenticator=authenticator)
    discovery.set_service_url(
        'https://gateway.watsonplatform.net/natural-language-classifier/api')
    classifier = NLClassifier(authenticator)

    # Take Input, can input multiple lines
    text_file = open('../data/input.txt')
    for line in text_file:
        line_text = line.split(
            ',', 2)  # maximum 2 splits, because some text contains commas
        n1 = -1
        n2 = -1
        try:
            n1 = int(line_text[0])
            n2 = int(line_text[1])
        except:
            print("Node id should be an integer")
        # Clean the text
        for mark in del_marks:
            text = line_text[2].replace(mark, '')
        text = text.replace('&#039;', '\'')
        # Store this line if input is valid
        if not (n1 == -1 or n2 == -1):
            input = [n1, n2, text]
            inputs.append(input)
    text_file.close()

    # Get the statistics of the graph
    stat_file = open('../data/stat.txt')
    for line in stat_file:
        if 'max' in line:
            max_btwness = float(line.split(': ')[1])
        if 'min' in line:
            min_btwness = float(line.split(': ')[1])
        if 'avg' in line:
            avg_btwness = float(line.split(': ')[1])
    stat_file.close()

    # Get the betweenness record
    count = 0
    file = open('../data/btwness.txt')
    for line in file:
        line_text = line.split(',')
        n1 = int(line_text[0])
        n2 = int(line_text[1])
        btwness = float(line_text[2])
        # I search if there's an input represents the same edge as appeared in full list
        # so that I only need to iterate the full list of edges once,
        # because the length of inputs is much smaller than the length of all edges
        # and interating through inputs repeatly will be less costly
        # than iterating through all edges repeatly
        for input in inputs:
            # Append btwness as the 4th feature to each input
            index = inputs.index(input)
            if input[0] == n1:
                if input[1] == n2:
                    input.append(btwness)
                    count = count + 1  # count how many inputs have the 4th feature
            if input[1] == n1:
                if input[0] == n1:
                    input.append(btwness)
                    count = count + 1
            inputs[index] = input
            # Don't keep iterating if all inputs are processed
            if count == len(inputs):
                break
    file.close()

    for input in inputs:
        # Classify
        result = classifier.classify('90dbdex665-nlc-266',
                                     input[2]).get_result()
        class_name = result['classes'][0]['class_name']
        confidence = result['classes'][0]['confidence']
        # if top class is 'bully'
        if 'bully' in class_name:
            normalized_confidence = (confidence - min_confidence) / (
                max_confidence - min_confidence)
            if len(input) != 4:
                # This means their is no betweenness data appended in the previous step
                # So the edge does not exist, they are strangers, it is purely bullying
                if normalized_confidence > .9:
                    print(input[2] + "bully")
            else:
                btwness = input[3]
                print(f'{confidence}, {normalized_confidence}, {btwness}')
                if normalized_confidence > .9:
                    if btwness > avg_btwness:  # need a better threshold
                        # larger btwness means less significant network
                        print(input[2] + "prob. FP")
                    else:
                        print(input[2] + "bully")
                elif normalized_confidence > .5:
                    if btwness < avg_btwness:  # need a better threshold
                        # smaller btwness means more significant network
                        print(input[2] + "prob. FN")