Beispiel #1
0
def stop():
    """stop the workers"""
    for worker in workers:
        celery_command("stop", workers[worker]["app"], workers[
                       worker]["hostlist"], workers[worker]["queue"])
    mq.stop()
    clean()
Beispiel #2
0
def stop():
    """stop the workers"""
    for worker in workers:
        celery_command("stop", workers[worker]["app"],
                       workers[worker]["hostlist"], workers[worker]["queue"])
    mq.stop()
    clean()
def list_hist(source):
    ''' List of lists histogram. [['hello', 1], ['you', 3], ['sir', 4]]
    Takes text. Stores each item in text, compares each item to the rest of the words in 
    text and keeps a running total. Used list account for no repeats.
    '''

    histo = []
    used = []

    text = clean(source)

    # print(text)

    for word in text:
        counter = 0
        if word in used:
            continue

        used.append(word)

        for word2 in text:
            if word == word2:
                counter += 1
            
        instance = [word, counter]
        histo.append(instance)

    # print(histo)
    return histo
Beispiel #4
0
def newdoc(text):
    doc = (clean(tb(text).lower()))
    dataset = pd.read_csv('cleaned.csv', index_col=0).dropna()
    vector = TfidfVectorizer()

    transformer = vector.fit_transform(dataset['text'])
    pos = dataset['label']

    #nb = MultinomialNB()
    #nb=nb.fit(transformer, pos)

    newdoc_transformed = vector.transform([doc])
    # save the classifier
    #with open('nb_classifier.pkl', 'wb') as fid:
    #    pickle.dump(nb, fid)

    # load MultinomialNB
    with open('nb_classifier.pkl', 'rb') as fid:
        nb_loaded = pickle.load(fid)

    #svm = SGDClassifier(loss='hinge', penalty='l2', alpha = 1e-3, n_iter = 5, random_state = 42)
    #svm = svm.fit(transformer, pos)
    #with open('svm_classifier.pkl', 'wb') as fid:
    #    pickle.dump(nb, fid)

    # load svm
    with open('svm_classifier.pkl', 'rb') as fid:
        svm_loaded = pickle.load(fid)
    pred = nb_loaded.predict(newdoc_transformed)[0]
    pred1 = svm_loaded.predict(newdoc_transformed)[0]
    feature_names = vector.get_feature_names()

    doc = 0
    feature_index = newdoc_transformed[doc, :].nonzero()[1]
    tfidf_scores = zip(feature_index, [newdoc_transformed[doc, x] for x in feature_index])
    sorted_words = sorted(tfidf_scores, key=lambda x: x[1], reverse=True)

    if (pred == 1 or pred1 == 1):
        print("It's positive article")
        num = random.randint(1, 3)
        if num == 1:
            return ("Почему {} не оставит вас равнодушным".format(feature_names[sorted_words[0][0]]))
        if num == 2:
            return ("Как с помощью {} победить всех".format(feature_names[sorted_words[0][0]]))
        if num == 3:
            return ("Как {} вам поможет".format(feature_names[sorted_words[0][0]]))
    else:
        print("It's negative article")
        num = random.randint(1, 3)
        if num == 1:
            return ("Почему из-за {} так плохо".format(feature_names[sorted_words[0][0]]))
        if num == 2:
            return ("Почему мы совершаем одинаковые ошибки с {}".format(feature_names[sorted_words[0][0]]))
        if num == 3:
            return ("Как победить {} в три этапа".format(feature_names[sorted_words[0][0]]))
Beispiel #5
0
    def run_virtual_sensor(self, inference_df):
        """Run virtual sensor.

        Args:
            TODO: What input format here? Currently it takes a CSV-file.
            input_data ():

        """

        params = yaml.safe_load(open("params.yaml"))
        classification = params["clean"]["classification"]
        onehot_encode_target = params["clean"]["onehot_encode_target"]
        learning_method = params["train"]["learning_method"]
        input_method = params["scale"]["input"]
        output_method = params["scale"]["output"]
        window_size = params["sequentialize"]["window_size"]
        overlap = params["sequentialize"]["overlap"]

        self._check_features_existence(inference_df)

        df = clean(inference_df=inference_df)
        df = featurize(inference=True, inference_df=df)

        X = np.array(df)

        input_scaler = joblib.load(INPUT_SCALER_PATH)
        output_scaler = joblib.load(OUTPUT_SCALER_PATH)

        if input_method is not None:
            X = input_scaler.transform(X)

        X = split_X_sequences(X, window_size, overlap=overlap)

        if learning_method in NON_DL_METHODS:
            model = load(MODELS_FILE_PATH)
        else:
            model = models.load_model(MODELS_FILE_PATH)

        y_pred = model.predict(X)

        if onehot_encode_target:
            y_pred = np.argmax(y_pred, axis=-1)
        elif classification:
            y_pred = np.array((y_pred > 0.5), dtype=np.int)

        print(y_pred)
        # plt.figure()
        # plt.plot(y_pred)
        # plt.show()

        return y_pred
from CONSTANT import *
from diablo3_scrapper_utils import *
from clean import *
from get_page import *
from copy_paste import *
from create_database import *
from get_categorie import *
from test_bd import *
import sqlite3

#De-commente pour retelecharger les pages en cas de mises a jour de Diablo et de changements dans les items
get_page(dir_storage)

#DEBUG supprime les fichiers temporaires se trouvant dans le repertoire d'execution
clean(".", True)
#On copie les fichiers d'un repertoire de stockage des pages HTML pour eviter de faire trop de requetes aupres
#du serveur de blizzard
copy_from_data_storage(dir_storage)
#On cree la base de donnees
create_database()

#On cree une connection vers la base de donnees pour ne pas que les
#fonctions ouvrent et ferment plusieurs dizaines de fois une connection
db = sqlite3.connect(DB_NAME)

for element in url_extension:

    #On selectionne seulemnt le contenu entre les balises <tbody> et </tbody>
    #Le contenu entre ces lignes est le detail et le nom des items
    select("<tbody>\n","</tbody>\n", element)
    # runs all 6 algorithms
    tune = 'tune' in argv
    # attempt multiple runs with different tuning parameters
    final = 'final' in argv
    # use best algorithm, tuning parameters, and features
    write = 'write' in argv
    # write plots

    if (short + tune + final) > 1:
        print 'COULD NOT RUN'
        print 'short, tune, and final are incompatible arguments'
    elif (short + tune + final) == 0:
        final = True

    else:

        if read:
            if shard:
                df_clean = pd.read_csv('../data/clean_shard.csv')
            else:
                ints_in_argv = [int(arg) for arg in argv if arg.isdigit()]
                if ints_in_argv:
                    rows = ints_in_argv[0]
                    df_clean = pd.read_csv('../data/clean.csv', nrows=rows)
                else:
                    df_clean = pd.read_csv('../data/clean.csv')
        else:
            df_clean = clean(load())

        model(df_clean, shard, short, tune, final, write)
Beispiel #8
0
                 details, anthem, urls, faces):
        self.name = name
        self.age = age
        self.college = college
        self.job = job
        self.city = city
        self.gender = gender
        self.distance = distance
        self.details = details
        self.anthem = anthem
        self.urls = urls
        self.faces = faces


df = pd.read_csv('data/raw/profile_data.csv')
df = clean(df)
df = fill_missing_cities(df)
df = add_location_values(df)


def find_face(img):
    img = Image.open(img).convert('RGB')
    img = np.array(img)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = facealt_cascade.detectMultiScale(gray, 1.1, 4)

    # print("Found {} faces".format(len(faces)))
    face_arr = []
    for (x, y, w, h) in faces:
        face_arr.append(gray[y:y + h, x:x + h])
    # for each in face_arr:
Beispiel #9
0
                        scoring=filtered_accuracy,
                        n_jobs=-1,
                        iid=False,
                        cv=3)

    grid.fit(df_X_known.copy(), y_known)

    unknown_predictions = filtered_predict(grid,
                                           df_unknowns,
                                           filter_train=True,
                                           df_X_train=df_knowns,
                                           y_train=y_knowns)

    end_time(start)


if __name__ == '__main__':

    read = 'read' in argv
    # reads data from .csv

    if read:
        df_knowns = pd.read_csv('../data/clean.csv')
    else:
        df_knowns = clean(load())

    df_unknowns = clean(load_unknowns())
    # NEED load_unknowns() function

    predict(df_knowns, df_unknowns)
Beispiel #10
0
                    default='instructions.yaml',
                    help='yaml file with data generation instructions')
args = parser.parse_args()

cwd = os.getcwd()

with open(args.instF) as f:
    instr = yaml.load(f, Loader=yaml.FullLoader)

n = instr.get('learnNum')
seg = instr.get('seg')
sN = instr.get('seriesNum')
ST = instr.get('seriesTrain')

i = 0
for i in range(
        0, n):  # run the classiciation model n times for the given color setup
    start = datetime.datetime.now()
    executeOrder66(args.instF,
                   cwd)  # create the training data from the instructions file
    # if seg:
    #     saveGraphs()
    #     cleanReg(cwd)
    if ST:
        series_classification(cwd, i)
        cleanSeries(cwd)
    else:
        graph_classification(cwd, i)
        clean(cwd)
    print('generation and classification ' + str(i) + ' elapsed time: ',
          (datetime.datetime.now() - start).total_seconds())
Beispiel #11
0
    # runs all 6 algorithms
    tune = 'tune' in argv
    # attempt multiple runs with different tuning parameters
    final = 'final' in argv
    # use best algorithm, tuning parameters, and features
    write = 'write' in argv
    # write plots

    if (short + tune + final) > 1:
        print 'COULD NOT RUN'
        print 'short, tune, and final are incompatible arguments'
    elif (short + tune + final) == 0:
        final = True

    else:

        if read:
            if shard:
                df_clean = pd.read_csv('../data/clean_shard.csv')
            else:
                ints_in_argv = [int(arg) for arg in argv if arg.isdigit()]
                if ints_in_argv:
                    rows = ints_in_argv[0]
                    df_clean = pd.read_csv('../data/clean.csv', nrows=rows)
                else:
                    df_clean = pd.read_csv('../data/clean.csv')
        else:
            df_clean = clean(load())

        model(df_clean, shard, short, tune, final, write)
Beispiel #12
0
def get_one_ticker(one_ticker_name):
    option_data = Options(one_ticker_name,data_source='yahoo').get_all_data()
    option_data.reset_index(inplace=True)
    option_data.drop('JSON', axis=1, inplace=True)
    r=clean(option_data)
    return r
def ngrams(input,n):
    input=clean(input)
    output=[]
    for i in range(0, len(input)-n+1):
        output.append(input[i:i + n])  # 切片添加
    return output
 def clean(self):
     # XXX: sice smaze vsechny data, ale pri tvorbe nove dtb uz nevytvori spravne indexy
     # smazeme vsechny vytvorene kolekce
     dtb = connect(self.db, self.application.config)
     clean(dtb)
Beispiel #15
0
def convert(code):
    # code += "\n"
    code = "\n" + code + "\n"
    converted = ""

    bold = False
    italic = False
    inlineCode = False
    bigCode = False
    link = {
        "isImage": False,
        "start": 0,
        "end": 0,
        "url": "",
        "titleStart": 0,
        "titleEnd": 0,
        "title": "",
        "urlStart": 0,
        "urlEnd": 0,
        "text": ""
    }
    # lists = []

    i = 1
    while i < len(code) - 1:
        if not code[i - 1] == "\\":
            if code[i] == "\n":
                converted += "<br/>"
            if within(i, len(code),
                      3) and code[i] + code[i + 1] + code[i + 2] == '```':
                i += 2
                # print ("Big Code")
                bigCode = not bigCode
                if bigCode:
                    converted += "<br/><div style='background-color: grey;'><code>"
                else:
                    converted += "</code></div>"
            else:
                if i < len(code) - 4:
                    if code[i] + code[i + 1] + code[i + 2] == "---" or code[
                            i] + code[i + 1] + code[i + 2] == "***":
                        i += 3
                        converted += "<hr>"
                if (code[i] == "_" and not code[i + 1] == "_") or (
                        code[i] == "*" and not code[i + 1] == "*"):
                    italic = not italic
                    i += 1
                    if not italic:
                        converted += "</i>"
                    else:
                        converted += "<i>"
                if code[i] == "`" and not code[i + 1] == "`":
                    inlineCode = not inlineCode
                    i += 1
                    if not inlineCode:
                        converted += "</code></span>"
                    else:
                        converted += "<span style='color: maroon;'><code>"

                if code[i] == "\n":
                    if inlineCode:
                        converted += "</code></span>"
                    inlineCode = False

                if code[i] + code[i + 1] == "**" or code[i] + code[i +
                                                                   1] == "__":
                    i += 1
                    bold = not bold
                    if not bold:
                        converted += "</b>"
                    else:
                        converted += "<b>"

                else:
                    if getLine(code, i).split()[0][0] == "#":

                        headernum = getLine(code, i).split()[0].count("#")
                        tmp = "<h" + str(headernum) + ">" + getLine(
                            code, i)[headernum:].strip() + "</h" + str(
                                headernum) + ">"
                        converted += tmp

                        i += len(getLine(code, i))

                    if getLine(code, i).split()[0][0] == ">":
                        # tmp =
                        converted += "<br><span style='color: grey; background-color: grey;'>|</span>" + (
                            " " + convertBasic(getLine(code, i).strip()))
                        i += (len(getLine(code, i)))

                    if getLine(code, i).split()[0][0:2] == "-" or getLine(
                            code, i).split()[0][0:2] == "*":
                        tmp = convertBasic(getLine(code, i).strip())

                        if getLine(code, i).split()[0][0:2] == "*":
                            converted = converted[:-3]

                        converted += "<ul><li>" + (" " + tmp) + "</li></ul>"
                        # print (converted)
                        i += len(getLine(code, i))

                    if code[i] == "[":
                        link["start"] = i
                        link["isImage"] = code[i - 1] == "!"

                    if code[i] == "]":
                        link["end"] = i
                        link["text"] = code[link["start"] + 1:link["end"]]
                        # print (link["text"])

                    if code[i] == "\"":
                        # if not link["start"] == 0 and link["end"] == 0:
                        if not link["titleStart"] == 0:
                            link["titleEnd"] = i
                            link["title"] = code[link["titleStart"] +
                                                 1:link["titleEnd"]]
                            # print (link["title"])
                        else:
                            link["titleStart"] = i

                    if code[i] == "(":
                        if link["start"]:
                            link["urlStart"] = i
                    if code[i] == ")":
                        if link["start"]:
                            link["urlEnd"] = i
                            link["url"] = code[link["urlStart"] +
                                               1:link["urlEnd"] -
                                               (len(link["title"]))]
                            if link["title"]:
                                link["url"] = link["url"][:-3]
                            # print (link["url"])

                            converted = converted[:0 -
                                                  (len(code[link["start"]:i]))]
                            i += 1
                            converted = converted[:-1]
                            converted += processLink(link)
                            link = {
                                "start": 0,
                                "end": 0,
                                "url": "",
                                "titleStart": 0,
                                "titleEnd": 0,
                                "title": "",
                                "urlStart": 0,
                                "urlEnd": 0,
                                "text": ""
                            }

                    converted += code[i]
        else:
            converted = converted[:-1]
            converted += code[i]

        i += 1
    return "<html>" + clean(converted) + "</html>"
Beispiel #16
0
from CONSTANT import *
from diablo3_scrapper_utils import *
from clean import *
from get_page import *
from copy_paste import *
from create_database import *
from get_categorie import *
from test_bd import *
import sqlite3

#De-commente pour retelecharger les pages en cas de mises a jour de Diablo et de changements dans les items
get_page(dir_storage)

#DEBUG supprime les fichiers temporaires se trouvant dans le repertoire d'execution
clean(".", True)
#On copie les fichiers d'un repertoire de stockage des pages HTML pour eviter de faire trop de requetes aupres
#du serveur de blizzard
copy_from_data_storage(dir_storage)
#On cree la base de donnees
create_database()

#On cree une connection vers la base de donnees pour ne pas que les
#fonctions ouvrent et ferment plusieurs dizaines de fois une connection
db = sqlite3.connect(DB_NAME)

for element in url_extension:

    #On selectionne seulemnt le contenu entre les balises <tbody> et </tbody>
    #Le contenu entre ces lignes est le detail et le nom des items
    select("<tbody>\n", "</tbody>\n", element)