예제 #1
0
def read_csv(filename = './train.csv'):
    phrase = []
    emoji = []

    with open (filename) as csvDataFile:
        csvReader = csv.reader(csvDataFile)
		print(csvReader)

        for row in csvReader:
            phrase.append(row[0])
            emoji.append(row[1])
예제 #2
0
def read_csv(filename='data/emojify_data.csv'):
    with open(filename) as f:
        csvfile=csv.reader(f)
        phrases=[]
        emoji=[]
        for row in csvfile:
            phrases.append(row[0])
            emoji.append(row[1])
    X=np.asarray(phrases)
    Y=np.asarray(emoji,dtype=int)
    return X,Y
def read_csv(filename):
    tweet, emoji = [], []

    with open(filename) as csvDataFile:
        csvReader = csv.reader(csvDataFile)

        for row in csvReader:
            tweet.append(row[0])
            emoji.append(row[1])

    return np.asarray(tweet), np.asarray(emoji, dtype=int)
예제 #4
0
def read_csv(filename):     #read dataset files
    phrase = []
    emoji = []

    with open (filename) as csvDataFile:
        csvReader = csv.reader(csvDataFile)
        for row in csvReader:
            phrase.append(row[0])
            emoji.append(row[1])
    X = np.asarray(phrase)
    Y = np.asarray(emoji, dtype=int)
    return X, Y
예제 #5
0
def read_csv(filename = 'data/emojify_data.csv'):
    phrase = []
    emoji = []

    with open (filename, encoding='utf-8') as csvDataFile:
        csvReader = csv.reader(csvDataFile)

        for row in csvReader:
            phrase.append(row[0])
            emoji.append(row[1])

    X = np.asarray(phrase)
    Y = np.asarray(emoji, dtype=int)

    return X, Y
예제 #6
0
def read_csv(filename='F:\\deeplearning-data\\word2vec-data\\train_emoji.csv'):
    phrase = []
    emoji = []

    with open(filename) as csvDataFile:
        csvReader = csv.reader(csvDataFile)

        for row in csvReader:
            phrase.append(row[0])
            emoji.append(row[1])

    X = np.asarray(phrase)
    Y = np.asarray(emoji, dtype=int)

    return X, Y
예제 #7
0
def read_csv(filename = 'data/emojify_data.csv'):
    phrase = []
    emoji = []

    with open (filename) as csvDataFile:
        csvReader = csv.reader(csvDataFile)

        for row in csvReader:
            phrase.append(row[0])
            emoji.append(row[1])

    X = np.asarray(phrase)
    Y = np.asarray(emoji, dtype=int)

    return X, Y
예제 #8
0
def load_csv(filename):
    sentence = []
    emoji = []

    # open the csv and separate the text sentences and emoji
    with open(filename) as csv_file:
        csv_data = csv.reader(csv_file)

        for row in csv_data:
            sentence.append(row[0])
            emoji.append(row[1])

    X = np.asarray(sentence)
    Y = np.asarray(emoji, dtype=int)

    return X, Y
예제 #9
0
def read_csv(filename='data/emojify_data.csv'):
    phrase = []
    emoji = []

    with open(settings.BASE_DIR + settings.STATIC_URL +
              filename) as csvDataFile:
        csvReader = csv.reader(csvDataFile)

        for row in csvReader:
            phrase.append(row[0])
            emoji.append(row[1])

    X = np.asarray(phrase)
    Y = np.asarray(emoji, dtype=int)

    return X, Y
예제 #10
0
def read_csv(filename='data/emojify_data.csv'):
    phrase = []
    emoji = []
    sdvgwuygv
    sdbxuwydcy
    csidcgwuyg
    cbwu
    with open(filename) as csvDataFile:
        csvReader = csv.reader(csvDataFile)

        for row in csvReader:
            phrase.append(row[0])
            emoji.append(row[1])

    X = np.asarray(phrase)
    Y = np.asarray(emoji, dtype=int)

    return X, Y
예제 #11
0
def read_csv_ht(filename = 'data/emojify_data.csv', rte = False):
    phrase_h = []
    phrase_t = []
    emoji = []

    with open (filename, encoding='utf8') as csvDataFile:
        csvReader = csv.reader(csvDataFile)

        for row in csvReader:
            # print(". ".join(row[0:1]))
            # print(row[2])
            phrase_h.append(row[0])
            phrase_t.append(row[1])
            emoji.append(row[2])

    # print(phrase)
    # print(emoji)
    X_h = np.asarray(phrase_h)
    X_t = np.asarray(phrase_t)
    Y = np.asarray(emoji, dtype=int)

    return X_h, X_t, Y
예제 #12
0
def read_csv(filename='data/SD_dataset_FINAL.csv'):
    phrase = []
    emoji = []

    with open(filename) as csvDataFile:
        csvReader = csv.reader(csvDataFile)
        special_char_pattern = re.compile(r'([{.(-)!}])')
        for row in csvReader:
            num_col = len(row)
            s = ""
            for c_n in range(num_col - 1):
                s += row[c_n]
            s = re.sub(',', ':', s)
            s = contractions.fix(s)
            s = special_char_pattern.sub(" \\1 ", s)
            s = remove_special_characters(s, remove_digits=True)
            phrase.append(s)
            emoji.append(row[num_col - 1])

    X = np.asarray(phrase)
    Y = np.asarray(emoji, dtype=int)

    return X, Y
예제 #13
0
def read_csv(filename = 'data/emojify_data.csv', rte = False):
    phrase = []
    emoji = []

    with open (filename, encoding='utf8') as csvDataFile:
        csvReader = csv.reader(csvDataFile)

        for row in csvReader:

            if rte:
                # print(". ".join(row[0:1]))
                # print(row[2])
                phrase.append(" ".join(row[0:2]))
                emoji.append(row[2])
            else:
                phrase.append(row[0])
                emoji.append(row[1])

    # print(phrase)
    # print(emoji)
    X = np.asarray(phrase)
    Y = np.asarray(emoji, dtype=int)

    return X, Y
예제 #14
0
#use dictionary to store the emoji
#content include(text,city)pair
content = collection.find({},{"text":1,"place.full_name":1, "_id":0})


#for extra
stateEmojiCount = {}


for words in content:
    # emoji list
    emoji = []

    for t in words["text"]:
        if t in UNICODE_EMOJI:
            emoji.append(t);

    if len(words) == 2:
        address = words['place']['full_name'].split(", ")
        if len(address)== 2:
            city = address[0]
            state = address[1]

        else:
            state = ''

    if len(state) == 2:
        # for extra point
        if state in stateEmojiCount:
            stateEmojiCount[state] += emoji
        else:
예제 #15
0
stateTweets = {}

#count the tweet used in city in California
cityCount = {}

# #for extra
# stateEmojiCount = {}

for words in content:
    # print(words)
    # emoji list
    emoji = []

    for t in words["text"]:
        if t in UNICODE_EMOJI:
            emoji.append(t)

    for symbol in emoji:
        if symbol in emojiCount:
            emojiCount[symbol] += 1
        else:
            emojiCount[symbol] = 1

    if len(words) == 2:
        address = words['place']['full_name'].split(", ")
        if len(address) == 2:
            city = address[0]
            # print(address[0])
            state = address[1]
            # print(address[1])
        else: