def get_data():
    all_files = os.listdir("../Image_CNN/images")[:Static.limit]
    ids = list(map(lambda x: x[:-4], all_files))
    # for id in ids:
    # value = Static.facebookDb.getRow(id)
    # print(id, ": ", value)
    rows = list(
        map(lambda x: x[0], filter(lambda x: x if x else None, map(lambda x: Static.facebookDb.getRow(x), ids))))
    data = list(map(lambda x: (x[0], x[10], x[2], x[3]), rows))
    messages = list(map(lambda x: x[1], data))
    # share_counts = list(map(lambda x: x[2], data))
    comment_counts = list(map(lambda x: x[3], data))
    image_data = transform_image_data(all_files)
    message_data = to_vector(messages)
    y_data = list(map(lambda x: x if x > 0 else 0,
                      filter(lambda x: x != None, map(lambda x: Static.metric_getter(x), ids))))
    combined_data = zip(image_data, message_data, y_data)
    import statistics
    print("Data Var: ", statistics.stdev(y_data) ** 2)
    for data_chunk in group(combined_data, Static.group_size):  # ):group(
        image_data_batch, message_data_batch, y_data_batch = zip(*data_chunk)
        (trainX_image, testX_image, trainY_image, testY_image) = train_test_split(image_data_batch, y_data_batch,
                                                                                  test_size=0.25, random_state=42)
        (trainX_message, testX_message, trainY_message, testY_message) = train_test_split(message_data_batch,
                                                                                          y_data_batch, test_size=0.25,
                                                                                          random_state=42)
        assert trainY_image == trainY_message
        assert testY_image == testY_message
        yield trainX_image, trainX_message, trainY_image, testX_image, testX_message, testY_image
def get_data():
    all_files = os.listdir("../Image_CNN/images")[:Static.limit]
    print("Size of all files: {0}".format(len(all_files)))
    ids = list(map(lambda x: x[:-4], all_files))
    # for id in ids:
    # value = Static.facebookDb.getRow(id)
    # print(id, ": ", value)
    rows = list(
        map(lambda x: x[0], filter(lambda x: x if x else None, map(lambda x: Static.facebookDb.getRow(x), ids))))
    data = list(map(lambda x: (x[0], x[10], x[2], x[3]), rows))
    messages = list(map(lambda x: x[1], data))
    # share_counts = list(map(lambda x: x[2], data))
    comment_counts = list(map(lambda x: x[3], data))
    image_data = transform_image_data(all_files)
    message_data = to_vector(messages)
    y_data = list(map(lambda x: Static.metric_getter(x), ids))
    combined_data = zip(image_data, message_data, y_data)
    image_data_batch, message_data_batch, y_data_batch = zip(*combined_data)
    (trainX_image, testX_image, trainY_image, testY_image) = train_test_split(image_data_batch, y_data_batch,
                                                                              test_size=0.25, random_state=42)
    (trainX_message, testX_message, trainY_message, testY_message) = train_test_split(message_data_batch,
                                                                                      y_data_batch, test_size=0.25,
                                                                                      random_state=42)
    assert trainY_image == trainY_message
    assert testY_image == testY_message
    return trainX_image, trainX_message, trainY_image, testX_image, testX_message, testY_image
Ejemplo n.º 3
0
def get_data():
    all_files = os.listdir("../Image_CNN/images")[:Static.limit]
    ids = list(map(lambda x: x[:-4], all_files))
    rows = list(
        map(lambda x: x[0], filter(lambda x: x if x else None, map(lambda x: Static.facebookDb.getRow(x), ids))))
    data = list(map(lambda x: (x[0], x[10], x[2], x[3]), rows))
    messages = list(map(lambda x: x[1], data))
    image_data = transform_image_data(all_files)
    message_data = to_vector(messages)
    y_data_metrics = list(map(lambda x: x if x > 0 else 0,
                              filter(lambda x: x != None, map(lambda x: Static.metric_getter(x), ids))))
    y_data = list(map(lambda x: x, y_data_metrics))
    combined_data = zip(image_data, message_data, y_data)
    if Static.group_size is None:
        Static.group_size = len(list(combined_data))
    import statistics
    print("Data Var: ", statistics.stdev(y_data) ** 2)
    print(y_data)
    image_data_batch, message_data_batch, y_data_batch = zip(*combined_data)
    (trainX_image, testX_image, trainY_image, testY_image) = train_test_split(image_data_batch, y_data_batch,
                                                                              test_size=0.1, random_state=42)
    (trainX_message, testX_message, trainY_message, testY_message) = train_test_split(message_data_batch,
                                                                                      y_data_batch, test_size=0.1,
                                                                                      random_state=42)
    assert trainY_image == trainY_message
    assert testY_image == testY_message
    return trainX_image, trainX_message, trainY_image, testX_image, testX_message, testY_image
def get_data():
    sentiment_post_ids = list(map(lambda x: x[0], Static.facebookDb.getImageIdWithPositiveCommentCounts()))
    all_dir_files = os.listdir("../Image_CNN/images")

    all_files = []
    for post_id in sentiment_post_ids:
        for file_name in all_dir_files:
            if post_id in file_name:
                all_files.append(file_name)
    print("Number of files in analysis: {0}".format(len(all_files)))
    print(all_files)
    ids = list(map(lambda x: x[:-4], all_files))
    rows = list(
        map(lambda x: x[0], filter(lambda x: x if x else None, map(lambda x: Static.facebookDb.getRow(x), ids))))
    data = list(map(lambda x: (x[0], x[10], x[2], x[3]), rows))
    messages = list(map(lambda x: x[1], data))
    image_data = transform_image_data(all_files)
    message_data = to_vector(messages)
    y_data = list(filter(lambda x: x != None, map(lambda x: Static.metric_getter(x), ids)))
    combined_data = zip(image_data, message_data, y_data)
    if Static.group_size is None:
        Static.group_size = len(list(combined_data)) - 10000
    import statistics
    print("Data Var: ", statistics.stdev(y_data) ** 2)
    image_data_batch, message_data_batch, y_data_batch = zip(*combined_data)  # ):group(
    (trainX_image, testX_image, trainY_image, testY_image) = train_test_split(image_data_batch, y_data_batch,
                                                                              test_size=0.25, random_state=42)
    (trainX_message, testX_message, trainY_message, testY_message) = train_test_split(message_data_batch,
                                                                                      y_data_batch, test_size=0.25,
                                                                                      random_state=42)
    assert trainY_image == trainY_message
    assert testY_image == testY_message
    return trainX_image, trainX_message, trainY_image, testX_image, testX_message, testY_image
def get_data():
    sentiment_post_ids = list(map(lambda x: x[0], Static.facebookDb.getImageIdWithPositiveCommentCounts()))
    all_files = os.listdir("../Image_CNN/images")[:Static.limit]

    print("Number of files in analysis: {0}".format(len(all_files)))
    ids = list(map(lambda x: x[:-4], all_files))
    # for id in ids:
    # value = Static.facebookDb.getRow(id)
    # print(id, ": ", value)
    rows = list(
        map(lambda x: x[0], filter(lambda x: x if x else None, map(lambda x: Static.facebookDb.getRow(x), ids))))
    data = list(map(lambda x: (x[0], x[10], x[2], x[3]), rows))
    messages = list(map(lambda x: x[1], data))
    # share_counts = list(map(lambda x: x[2], data))
    comment_counts = list(map(lambda x: x[3], data))
    image_data = transform_image_data(all_files)
    message_data = to_vector(messages)
    y_data = list(map(lambda x: x if x > 0 else 0,
                      filter(lambda x: x != None, map(lambda x: Static.metric_getter(x), ids))))
    combined_data = zip(image_data, message_data, y_data)
    import statistics
    print("Data Var: ", statistics.stdev(y_data) ** 2)
    image_data_batch, message_data_batch, y_data_batch = zip(*combined_data)
    (trainX_image, testX_image, trainY_image, testY_image) = train_test_split(image_data_batch, y_data_batch,
                                                                              test_size=0.25, random_state=42)
    (trainX_message, testX_message, trainY_message, testY_message) = train_test_split(message_data_batch,
                                                                                      y_data_batch, test_size=0.25,
                                                                                      random_state=42)
    assert trainY_image == trainY_message
    assert testY_image == testY_message
    return trainX_image, trainX_message, trainY_image, testX_image, testX_message, testY_image
    Global.metric_name = "commentCount"
elif "Sentiment" in Global.metric_getter.__name__:
    Global.metric_name = "postPositivity"
else:
    raise Exception("Did not get the right metric name")

postTexts = []

for post in MessageGetter.get_post_generator():
    pt = PostText(post)
    count = pt.getValues()
    postTexts.append(pt.getValues())

df = pd.DataFrame.from_records(postTexts, columns=MessageGetter.get_columns())
df = df[df[Global.metric_name] > 0]  # FILTER OUT ZEROS FOR NOW
x_data = to_vector(df["message"])
y_data = list(map(lambda x: x if x > 0 else 0, df[Global.metric_name]))
print(len(x_data))
print(len(y_data))

model = Sequential()


def create_model():
    input_layer_count = 1  # y_data.shape[1]
    model.add(Dense(200, input_dim=x_data.shape[1]))
    model.add(Dropout(0.3))

    model.add(Dense(100, input_dim=x_data.shape[1]))
    model.add(Dropout(0.3))