Esempio n. 1
0
def download_crawler_data(date):
    """
    Download raw data for a day.

    @param Date : Formatted date AAAA_MM_JJ, such as 2015_01_03 (str).
    """
    full_url = CRAWLER_URL + date + ".tar.gz"
    print("downloading : " + full_url)
    r = make_http_request(full_url)
    if not os.path.isdir(DEFAULT_DATA_DIRECTORY):
        os.makedirs(DEFAULT_DATA_DIRECTORY)
    with open(DEFAULT_DATA_DIRECTORY + date + ".tar.gz", 'wb') as f:
        f.write(r.content)
    # Untar and extract in the same directory.
    tar = tarfile.open(DEFAULT_DATA_DIRECTORY + date + ".tar.gz", "r:gz")
    tar.extractall(DEFAULT_DATA_DIRECTORY)
    tar.close()
    # Move the directory.
    try:
        shutil.move(
            os.path.join(DEFAULT_DATA_DIRECTORY, DEFAULT_LOCATION + date),
            DEFAULT_DATA_DIRECTORY)
    except shutil.Error:
        print("Error, maybe the directory " + date + " already exists")
    os.remove(DEFAULT_DATA_DIRECTORY + date + ".tar.gz")
    # works but not useful :
    # shutil.rmtree(os.path.join(DEFAULT_DATA_DIRECTORY,"srv"))
    print("Success with : " + date)
Esempio n. 2
0
def download_crawler_data(date):
	"""
	Download raw data for a day.
	Date : AAAA_MM_JJ, such as 2015_01_03
	"""
	full_url=CRAWLER_URL+date+".tar.gz"
	print("downloading : "+full_url)
	r = make_http_request(full_url)#requests.get(full_url,proxies=PROXY)
	#print(len(r.content))
	if not os.path.isdir(DEFAULT_DATA_DIRECTORY):
		os.makedirs(DEFAULT_DATA_DIRECTORY)
	with open(DEFAULT_DATA_DIRECTORY+date+".tar.gz",'wb') as f:
		#f.write(r.content)
		f.write(r.content)
	#untar and extract in the same directory
	tar=tarfile.open(DEFAULT_DATA_DIRECTORY+date+".tar.gz","r:gz")
	tar.extractall(DEFAULT_DATA_DIRECTORY)
	tar.close()
	#move the directory
	try:
		shutil.move(os.path.join(DEFAULT_DATA_DIRECTORY,DEFAULT_LOCATION+date),DEFAULT_DATA_DIRECTORY)
	except shutil.Error:
		print("Error, maybe the directory "+date+" already exists")
	os.remove(DEFAULT_DATA_DIRECTORY+date+".tar.gz")
	#works but not useful
	#shutil.rmtree(os.path.join(DEFAULT_DATA_DIRECTORY,"srv"))
	print("Success with : "+date)
Esempio n. 3
0
File: api.py Progetto: Nadrieril/psc
def search_source(source_uri = '/s/wordnet/3.0'):
    '''
    Returns the 50 statements submitted by this source, in raw json data.

    :param source_uri: a uri specifying the source, e.g. '/s/contributor/omcs/rspeer', 
    '/s/wordnet/3.0', '/s/rule/sum_edges' etc.
    '''
    #enc_query_args = urllib.parse.urlencode(query_args)
    url = ''.join(['%s%s' % (settings.BASE_LOOKUP_URL, source_uri)])
    json_data = make_http_request(url)
    return json_data
Esempio n. 4
0
File: api.py Progetto: Nadrieril/psc
def get_similarity(concept1='dog',concept2='dog'):
    """
    Returns a similarity score between two concepts.
    """
    query_args={"filter" : '/c/'+settings.LANGUAGE+"/"+concept2}
    enc_query_args = urllib.parse.urlencode(query_args)
    url = ''.join(['%s/c/%s/%s?' % (settings.BASE_ASSOCIATION_URL, settings.LANGUAGE,concept1)]) + enc_query_args
    json_data = make_http_request(url)
    parsed=parse_similar_concepts(json_data)
    if parsed:
        return parsed[0][1]
    else:
        return 0
Esempio n. 5
0
File: api.py Progetto: Nadrieril/psc
def get_similar_concepts(concept='dog',filter='/c/en/',limit=10,**kwargs):
    """
    Returns a list of similar concepts with a score.
    """
    query_args={"filter" : filter, "limit" : limit}
    for key, value in kwargs.items():
        if key in settings.SUPPORTED_ASSOCIATION_ARGS:
            query_args[key] = value
        else:
            raise Exception("Association argument '"+key+"' incorrect.")
    enc_query_args = urllib.parse.urlencode(query_args)
    url = ''.join(['%s/c/%s/%s?' % (settings.BASE_ASSOCIATION_URL, settings.LANGUAGE,concept)]) + enc_query_args
    json_data = make_http_request(url)
    return parse_similar_concepts(json_data)
Esempio n. 6
0
File: api.py Progetto: Nadrieril/psc
def search_edges(filter='/c/en/',limit=10,**kwargs):
    """
    :rtype: Edge list
    """
    query_args={"filter" : filter, "limit" : limit}
    #query_args = {}
    for key, value in kwargs.items():
        if key in settings.SUPPORTED_SEARCH_ARGS:
            query_args[key] = value
        else:
            raise Exception("Search argument '"+key+"' incorrect.")
    enc_query_args = urllib.parse.urlencode(query_args)   
    url = ''.join(['%s%s' % (settings.BASE_SEARCH_URL, '?')]) + enc_query_args
    json_data = make_http_request(url)
    return parse_relevant_edges(json_data)
Esempio n. 7
0
File: api.py Progetto: Nadrieril/psc
def search_concept(concept,limit=1,**kwargs):
    '''
    Returns a list of edges

    :param concept: a concept word or phrase, e.g. 'toast', 'see movie' etc.
    '''
    query_args = {"limit" : limit}
    for key, value in kwargs.items():
        if is_arg_valid(key, settings.SUPPORTED_LOOKUP_ARGS):
            query_args[key] = value
        else:
            raise Exception("LookUp argument '"+key+"' incorrect.")
    enc_query_args = urllib.parse.urlencode(query_args)
    concept = concept.replace(' ', '_')
    url = ''.join(['%s/c/%s/%s?' % (settings.BASE_LOOKUP_URL, settings.LANGUAGE, concept)]) + enc_query_args
    json_data = make_http_request(url).json()
    return parse_relevant_edges(json_data)
Esempio n. 8
0
File: api.py Progetto: PSC-INF02/psc
def get_similarity(concept1='dog', concept2='dog'):
    """
    Performs an association query and gets a similarity score between two concepts.

    @param concept1 First concept.
    @param concept2 Second concept.
    @return A similarity score (float).
    """
    query_args = {"filter": '/c/' + settings.LANGUAGE + "/" + concept2}
    enc_query_args = urllib.parse.urlencode(query_args)
    url = ''.join([
        '%s/c/%s/%s?' %
        (settings.BASE_ASSOCIATION_URL, settings.LANGUAGE, concept1)
    ]) + enc_query_args
    json_data = make_http_request(url)
    parsed = parse_similar_concepts(json_data)
    if parsed:
        return parsed[0][1]
    else:
        return 0
Esempio n. 9
0
File: api.py Progetto: PSC-INF02/psc
def search_edges(filter='/c/en/', limit=10, **kwargs):
    """
    Performs a search query and parses the result.
    @see settings.SUPPORTED_SEARCH_ARGS

    @param filter Filter.
    @param limit Maximum number of results.
    @param kwargs Other supported search arguments.
    @return A list of result.Edge objects.
    """
    query_args = {"filter": filter, "limit": limit}
    for key, value in kwargs.items():
        if key in settings.SUPPORTED_SEARCH_ARGS:
            query_args[key] = value
        else:
            raise Exception("Search argument '" + key + "' incorrect.")
    enc_query_args = urllib.parse.urlencode(query_args)
    url = ''.join(['%s%s' % (settings.BASE_SEARCH_URL, '?')]) + enc_query_args
    json_data = make_http_request(url)
    return parse_relevant_edges(json_data)
Esempio n. 10
0
File: api.py Progetto: PSC-INF02/psc
def get_similar_concepts_by_term_list(term_list,
                                      filter='/c/en/',
                                      limit=10,
                                      **kwargs):
    """
    Returns concepts similar to the list.
    Example : http://conceptnet5.media.mit.edu/data/5.3/assoc/list/en/wayne_rooney,sport
    """
    terms = ','.join(term_list)
    query_args = {"filter": filter, "limit": limit}
    for key, value in kwargs.items():
        if key in settings.SUPPORTED_ASSOCIATION_ARGS:
            query_args[key] = value
        else:
            raise Exception("Association argument '" + key + "' incorrect.")
    enc_query_args = urllib.parse.urlencode(query_args)
    url = ''.join([
        '%s/list/%s/%s?' %
        (settings.BASE_ASSOCIATION_URL, settings.LANGUAGE, terms)
    ]) + enc_query_args
    json_data = make_http_request(url)
    return parse_similar_concepts(json_data)
Esempio n. 11
0
File: api.py Progetto: PSC-INF02/psc
def get_similar_concepts(concept='dog', filter='/c/en/', limit=10, **kwargs):
    """
    Performs an association query and parses the result.
    @see settings.SUPPORTED_ASSOCIATION_ARGS

    @param concept Word or phrase.
    @param filter Filter.
    @param limit Maximum number of results.
    @param kwargs Other supported association arguments.
    @return A list of [concept,similarity].
    """
    query_args = {"filter": filter, "limit": limit}
    for key, value in kwargs.items():
        if key in settings.SUPPORTED_ASSOCIATION_ARGS:
            query_args[key] = value
        else:
            raise Exception("Association argument '" + key + "' incorrect.")
    enc_query_args = urllib.parse.urlencode(query_args)
    url = ''.join([
        '%s/c/%s/%s?' %
        (settings.BASE_ASSOCIATION_URL, settings.LANGUAGE, concept)
    ]) + enc_query_args
    json_data = make_http_request(url)
    return parse_similar_concepts(json_data)
Esempio n. 12
0
File: api.py Progetto: PSC-INF02/psc
def search_concept(concept, limit=1, **kwargs):
    '''
    Performs a lookup query and parses the result into edges objects.
    @see result.py
    @see settings.SUPPORTED_LOOKUP_ARGS

    @param concept  A concept, word or phrase, e.g. 'toast', 'see movie' etc.
    @param limit    The number of results needed.
    @param kwargs Other supported lookup arguments.
    @return A list of result.Edge objects.
    '''
    query_args = {"limit": limit}
    for key, value in kwargs.items():
        if key in settings.SUPPORTED_LOOKUP_ARGS:
            query_args[key] = value
        else:
            raise Exception("LookUp argument '" + key + "' incorrect.")
    enc_query_args = urllib.parse.urlencode(query_args)
    concept = concept.replace(' ', '_')
    url = ''.join([
        '%s/c/%s/%s?' % (settings.BASE_LOOKUP_URL, settings.LANGUAGE, concept)
    ]) + enc_query_args
    json_data = make_http_request(url).json()
    return parse_relevant_edges(json_data)