def fct():
	elastic = Elasticsearch(hosts=[{'host':'localhost','port':9200}])

	# the Schema, used to force specific types and to add alias/ it is changed according to files content
	schema = {    
					"settings": {
							"analysis": {
									"analyzer": {
											"my_english_analyzer": {"type": "standard","stopwords": "_english_"}
										}
								}
						},   
					"mappings":{
							"properties":{   
							"source": { "type":"text" , "analyzer": "my_english_analyzer", "fields": {"keyword": { "type": "keyword"}}, "fielddata": True},
							"term":   { "type":"text", "analyzer": "my_english_analyzer", "fields": {"keyword": { "type": "keyword"}}, "fielddata": True},                           
							"key":   { "type":"text", "analyzer": "my_english_analyzer", "fields": {"keyword": { "type": "keyword"}}, "fielddata": True}
							} 
					}
			}

	# Create index with a schema
	c.createIndex('dfp_perso_tw', schema, elastic)


	inputFolder = dirpath+"/script/dataSource/json-twitter_data"
	for loadType in ["personal_data_fixed"]:
			whatFile = os.path.join(inputFolder, loadType+'.json')
			try:
				response = helpers.bulk(elastic, bulkJsonData(whatFile, "dfp_perso_tw",loadType))
				print ("Insert Twitter Personal Data")
			except:
					print ("Error in Twitter : "+ whatFile)
					pass
예제 #2
0
def fct():
    # create a new instance of the Elasticsearch client class
    #elastic = Elasticsearch("localhost")
    elastic = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}])

    # the Schema, used to force specific types and to add alias/ it is changed according to files content
    schema = {
        "mappings": {
            "properties": {
                "timestamp": {
                    "type": "date",
                    "format": "date_optional_time||epoch_second"
                },
                "created_at": {
                    "type": "alias",
                    "path": "timestamp"
                }
            }
        }
    }

    # Create index with a schema
    c.createIndex('dfp_fb_vote', schema, elastic)

    inputFolder = dirpath + "/script/dataSource/json-facebook_data/other_activity"
    for loadType in ["polls_you_voted_on"]:
        whatFile = os.path.join(inputFolder, loadType + '.json')
        try:
            response = helpers.bulk(
                elastic, bulkJsonData(whatFile, "dfp_fb_vote", loadType))
            print("Insert Facebook Votes")
        except:
            print("Error in Facebook votes")
            pass
def fct():
    elastic = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}])

    # the Schema, used to force specific types and to add alias/ it is changed according to files content
    schema = {
        "settings": {
            "analysis": {
                "analyzer": {
                    "my_english_analyzer": {
                        "type": "standard",
                        "stopwords": "_english_"
                    }
                }
            }
        },
        "mappings": {
            "properties": {
                "created_at": {
                    "type":
                    "date",
                    "format":
                    "EEE MMM dd HH:mm:ss ZZ yyyy||date_optional_time||epoch_second"
                },
                "full_text": {
                    "type": "text",
                    "analyzer": "my_english_analyzer",
                    "fields": {
                        "keyword": {
                            "type": "keyword"
                        }
                    },
                    "fielddata": True
                },
                "all_text": {
                    "type": "alias",
                    "path": "full_text"
                },
                "mySentiment": {
                    "type": "float"
                },
                "sentPositive": {
                    "type": "float"
                },
                "sentNegative": {
                    "type": "float"
                }
            }
        }
    }

    # Create index with a schema
    c.createIndex('dfp_text_tw_tweet', schema, elastic)

    inputFolder = dirpath + "/script/dataSource/json-twitter_data"
    for loadType in ["tweet"]:
        whatFile = os.path.join(inputFolder, loadType + '.json')
        #try:
        response = helpers.bulk(
            elastic, bulkJsonData(whatFile, "dfp_text_tw_tweet", loadType))
        print("Insert Twitter Tweets")
def fct():
    elastic = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}])

    # the Schema, used to force specific types and to add alias/ it is changed according to files content
    schema = {
        "mappings": {
            "properties": {
                "timestamp": {
                    "type": "date",
                    "format": "date_optional_time||epoch_second"
                },
                "created_at": {
                    "type": "alias",
                    "path": "timestamp"
                }
            }
        }
    }

    # Create index with a schema
    c.createIndex('dfp_fb_likes', schema, elastic)

    inputFolder = dirpath + "/script/dataSource/json-facebook_data/likes_and_reactions"
    for loadType in ["likes_on_external_sites", "pages", "posts_and_comments"]:
        whatFile = os.path.join(inputFolder, loadType + '.json')

        try:
            response = helpers.bulk(
                elastic, bulkJsonData(whatFile, "dfp_fb_likes", loadType))
            print("Insert Facebook Friends")
        except:
            print("Error in Facebook :" + whatFile)
        pass
예제 #5
0
def fct():
    elastic = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}])

    # the Schema, used to force specific types and to add alias/ it is changed according to files content
    schema = {
        "mappings": {
            "properties": {
                "accountId": {
                    "type": "keyword"
                },
                "userLink": {
                    "type": "keyword"
                }
            }
        }
    }

    # Create index with a schema
    c.createIndex('dfp_people_tw_follow', schema, elastic)

    inputFolder = dirpath + "/script/dataSource/json-twitter_data"
    for loadType in ["follower", "following"]:
        whatFile = os.path.join(inputFolder, loadType + '.json')
        try:
            response = helpers.bulk(
                elastic,
                bulkJsonData(whatFile, "dfp_people_tw_follow", loadType))
            print("Insert Twitter follower and following")
        except:
            print("Error in Insert Twitter : " + whatFile)
            pass
예제 #6
0
def fct():
	elastic = Elasticsearch(hosts=[{'host':'localhost','port':9200}])

	# the Schema, used to force specific types and to add alias/ it is changed according to files content

	schema = {    
		  "mappings":{
		    "properties":{   
		      "Connected On":   { "type":"date", "format":"dd MMM yyyy"},
		      "created_at": { "type": "alias", "path": "Connected On" }
		    } 
		  }
		}


	# Create index with a schema
	c.createIndex('dfp_people_li_connections', schema, elastic)


	inputFolder = dirpath+"/script/dataSource/json-LinkedIn_data"
	for loadType in ["Connections"]:
		whatFile = os.path.join(inputFolder, loadType+'.json')
		#try:
		response = helpers.bulk(elastic, bulkJsonData(whatFile, "dfp_people_li_connections",loadType))
		#except:
			#print ("Error in "+ whatFile)
			#pass


	print ("Insert LinkedIn Friends")
def fct():
    elastic = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}])

    # the Schema, used to force specific types and to add alias/ it is changed according to files content

    schema = {
        "mappings": {
            "properties": {
                "date": {
                    "type":
                    "date",
                    "format":
                    "EEE MMM dd HH:mm:ss ZZ yyyy||date_optional_time||epoch_second"
                },
                "mySentiment": {
                    "type": "float"
                },
                "sentPositive": {
                    "type": "float"
                },
                "sentNegative": {
                    "type": "float"
                },
            }
        }
    }

    # Create index with a schema
    c.createIndex('dfp_sentiment', schema, elastic)

    inputFolder = dirpath + '/vegaFiles/'

    for r, d, f in os.walk(inputFolder):
        for file in f:
            if file.endswith("sentiment.json"):
                whatFile = os.path.join(inputFolder, file)
                try:
                    file_part = file.split(".")[0]
                    response = helpers.bulk(
                        elastic,
                        bulkJsonData(whatFile, "dfp_sentiment", file_part))
                    print("Insert Sentiment")
                except:
                    print("Error in Insert Sentiment")
                    pass
예제 #8
0
def fct():
    elastic = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}])

    # the Schema, used to force specific types and to add alias/ it is changed according to files content

    schema = {
        "mappings": {
            "properties": {
                "name": {
                    "type": "keyword"
                },
                "screen_name": {
                    "type": "text"
                },
                "tags": {
                    "type": "number"
                },
                "type": {
                    "type": "text"
                }
            }
        }
    }

    # Create index with a schema
    c.createIndex('dfp_friend', schema, elastic)

    inputFolder = dirpath + '/vegaFiles/'

    for r, d, f in os.walk(inputFolder):
        for file in f:
            if file.endswith("friends.json"):
                whatFile = os.path.join(inputFolder, file)
                try:
                    file_part = file.split(".")[0]
                    response = helpers.bulk(
                        elastic, bulkJsonData(whatFile, "dfp_friend",
                                              file_part))
                    print("Insert Friends")
                except:
                    print("Error in Insert " + whatFile)
                    pass
예제 #9
0
def fct():
	# create a new instance of the Elasticsearch client class 
	#elastic = Elasticsearch("localhost") 
	elastic = Elasticsearch(hosts=[{'host':'localhost','port':9200}])


	# the Schema, used to force specific types and to add alias/ it is changed according to files content
	schema = { 

	  "settings": {
		 "analysis": {
		   "analyzer": {
			 "my_english_analyzer": {"type": "standard","stopwords": "_english_"}
				}
			}
		},
			  
	  "mappings":{
		 "properties":{									 
			"timestamp":   { "type":"date", "format":"EEE MMM dd HH:mm:ss ZZ yyyy||date_optional_time||epoch_second"},
			"created_at": { "type": "alias", "path": "timestamp" },
			"all_text": { "type": "text", "analyzer": "my_english_analyzer", "fields": {"keyword": { "type": "keyword"}}, "fielddata": True},

			 "mySentiment":   { "type": "float"},
			 "sentPositive":  { "type": "float"},
			 "sentNegative":  { "type": "float"},

			"data":  {
				"properties": {
					"post": { "type": "text", "analyzer": "my_english_analyzer", "fields": {"keyword": { "type": "keyword"}}, "fielddata": True}
				}
			 },
			 "attachments": {
				"properties": {
				  "data": {
					"properties": {
					  "place": {
						"properties": {
							"location": {"type": "geo_point"}
							}				
						}
					}
				}
				
				}
			  }
			}
		}

	}

	# Create index with a schema
	c.createIndex('dfp_text_fb_posts', schema, elastic)

	#inputFolder = "../../dataSource/json-facebook_data/posts"
	inputFolder = dirpath+"/script/dataSource/json-facebook_data/posts"
	for loadType in ["your_posts_1"]:
		whatFile = os.path.join(inputFolder, loadType+'.json')
		try:
			response = helpers.bulk(elastic, bulkJsonData(whatFile, "dfp_text_fb_posts",loadType))
			print ("Insert Facebook Posts")
		except:
			print ("Error in Facebook Posts")
			pass