Ejemplo n.º 1
0
    def render_GET(self, request):
        # No authz check, we allow everyone to fetch service list
        host = get_headers(request, 'host')
        if not host:
            log.msg('Client did not send proper host header.',
                    system='sgas.TopResource')
            return  # FIXME this returns 500...

        # stuff needed for being cooperative with a reverse proxy
        # note: once loggers get updated to understands path referrel
        # (currently they only understand complete URLs) these hacks
        # can be removed - this will probably be in the beginning of 2011 :-)
        if has_headers(request, 'x-forwarded-port'):
            host += ':' + get_headers(request, 'x-forwarded-port')

        is_secure = request.isSecure()
        if get_headers(request, 'x-forwarded-protocol') == 'https':
            is_secure = True

        basepath = '/'.join(request.prepath)
        baseurl = self._createBaseURL(host, is_secure, basepath)
        #print "BASEURL", baseurl
        tree = self._createServiceTree(baseurl)
        ts = XML_HEADER + ET.tostring(tree) + "\n"
        return ts
Ejemplo n.º 2
0
Archivo: app.py Proyecto: fpriyatna/OME
def editor():
    source = request.args.get('source', default=None)
    original_file_name = source.split('/')[-1]
    file_type = request.args.get('format', default="csv")
    callback_url = request.args.get('callback', default="")
    error_msg = None
    warning_msg = None
    file_name = ""
    if source is None or source.strip() == '':
        headers = ["AAA", "BBB", "CCC"]
    # elif source.startswith('file:/'):
    #     original_file_name = source.split('/')[-1]
    #     headers = util.get_headers(source, file_type=file_type)
    #     file_name = source.split('/')[-1].split('.')[0] + "-" + util.get_random_string(4) + "." + source.split('.')[-1]
    #     if headers == []:
    #         warning_msg = "Can't parse the source file %s" % source
    else:
        if DOWNLOAD:
            r = requests.get(source, allow_redirects=True)
            if r.status_code == 200:
                fname = source.split('/')[-1].split(
                    '.')[0] + "-" + util.get_random_string(
                        4) + "." + source.split('.')[-1]
                file_name = fname
                uploaded_file_dir = os.path.join(UPLOAD_DIR, fname)
                f = open(uploaded_file_dir, 'w')
                f.write(r.content)
                f.close()
                headers = util.get_headers(uploaded_file_dir,
                                           file_type=file_type)
                if headers == []:
                    warning_msg = "Can't parse the source file %s" % source
            else:
                error_msg = "the source %s can not be accessed" % source
                print error_msg
                headers = []
        else:
            headers = util.get_headers(source, file_type=file_type)
            if headers == []:
                warning_msg = "Can't parse the source file %s" % source
    # if callback_url:
    #     files = {'upload_file': open('file.txt', 'rb')}
    #     values = {'DB': 'photcat', 'OUT': 'csv', 'SHORT': 'short'}
    #     r = requests.post(url, files=files, data=values)
    #     return render_template('msg.html', msg="Your mappings has been sent", msg_title="Result")
    # else:
    f = open(os.path.join(DATA_DIR, "labels.txt"))
    return render_template('editor.html',
                           labels_txt=f.read(),
                           headers=headers,
                           callback=callback_url,
                           file_name=original_file_name,
                           error_msg=error_msg,
                           warning_msg=warning_msg)
Ejemplo n.º 3
0
def import_data(f, record_id, target_id, id=False):
    '''imports the data and converts it to X (input) and y (output) data vectors'''
    rows = read_csv(f)

    # save column names as headers, save indices of record and target IDs
    headers = util.get_headers(rows.next())

    try:
        record_col = headers.index(record_id)
        target_col = headers.index(target_id)
    except:
        print 'The specified instance ID was not found as column name. Manually check input file for correct instance ID column.'
        return False, False, False

    # save and split records
    print '  ...(loading)'
    if not id:
        records = [row[1:] for row in rows]
        headers = headers[1:-1]
    else:
        records = [row for row in rows]
        headers = headers[0:-1]
    print '  ...(converting to matrix)'
    records = np.matrix(records)
    X = records[:, 0:-1]  # features

    # output
    y = records[:, -1]  # target
    y = np.squeeze(np.asarray(y.astype(np.float)))

    print '  ...(converting data type)'
    X = X.astype(np.float64, copy=False)
    y = y.astype(np.float64, copy=False)

    return X, y, headers
def import_data(f, record_id, target_id):
	'''imports the data and converts it to X (input) and y (output) data vectors'''
	rows = read_csv(f)

	# save column names as headers, save indices of record and target IDs
	headers = util.get_headers(rows.next())
	
	try:
		record_col = headers.index(record_id)
		target_col = headers.index(target_id)
	except:
		print 'The specified instance ID was not found as column name. Manually check input file for correct instance ID column.'
		return False, False, False

	# save and split records
	print '  ...(loading)'
	records = [row[1:] for row in rows]
	print '  ...(converting to matrix)'
	records = np.matrix(records)
	X = records[:,0:-1] # features
	headers = headers[1:-1]

	# output
	y = records[:,-1] # target
	y=np.squeeze(np.asarray(y.astype(np.int)))

	print '  ...(converting data type)'
	X = X.astype(np.float64, copy=False)
	y = y.astype(np.float64, copy=False)

	return X, y, headers
Ejemplo n.º 5
0
def getSubject(request):
    """
    Utility method for extracting the subject name from a twisted.web.http.Request
    """
    # identity forwarded by reverse proxy
    if request.getClientIP() in LOOPBACK_ADDRESSES and has_headers(request, X_SSL_SUBJECT):
        return get_headers(request, X_SSL_SUBJECT)

    # request wasn't secure or no certificate was presented
    return None
Ejemplo n.º 6
0
def getSubject(request):
    """
    Utility method for extracting the subject name from a twisted.web.http.Request
    """
    # identity forwarded by reverse proxy
    if request.getClientIP() in LOOPBACK_ADDRESSES and has_headers(
            request, X_SSL_SUBJECT):
        return get_headers(request, X_SSL_SUBJECT)

    # request wasn't secure or no certificate was presented
    return None
Ejemplo n.º 7
0
    def render_GET(self, request):
        # No authz check, we allow everyone to fetch service list
        host = get_headers(request, 'host')
        if not host:
            log.msg('Client did not send proper host header.', system='sgas.TopResource')
            return # FIXME this returns 500...

        # stuff needed for being cooperative with a reverse proxy
        # note: once loggers get updated to understands path referrel
        # (currently they only understand complete URLs) these hacks
        # can be removed - this will probably be in the beginning of 2011 :-)
        if has_headers(request, 'x-forwarded-port'):
            host += ':' + get_headers(request, 'x-forwarded-port')

        is_secure = request.isSecure()
        if get_headers(request, 'x-forwarded-protocol') == 'https':
            is_secure = True

        basepath = '/'.join(request.prepath)
        baseurl = self._createBaseURL(host, is_secure, basepath)
        #print "BASEURL", baseurl
        tree = self._createServiceTree(baseurl)
        ts = XML_HEADER + ET.tostring(tree) + "\n"
        return ts
Ejemplo n.º 8
0
def main(couch_params, main_db, avatar_db, master_params, wikifeat_home):
    # Set up credentials
    credentials = util.get_credentials(couch_params.adminuser, couch_params.adminpass)
    global gh, ph
    gh = util.get_headers(credentials)
    ph = util.put_headers(credentials)
    global wf_dir
    wf_dir = wikifeat_home
    # Establish a connection to couchdb
    conn = util.get_connection(couch_params.use_ssl, couch_params.host, couch_params.port)
    conn.connect()
    setup_main_db(conn, main_db)
    setup_user_db(conn)
    setup_avatar_db(conn, avatar_db)
    if master_params.skip_master is False:
        create_master_user(conn, master_params)
Ejemplo n.º 9
0
def main(couch_params, main_db, avatar_db, master_params, wikifeat_home):
    # Set up credentials
    credentials = util.get_credentials(couch_params.adminuser,
                                       couch_params.adminpass)
    global gh, ph
    gh = util.get_headers(credentials)
    ph = util.put_headers(credentials)
    global wf_dir
    wf_dir = wikifeat_home
    # Establish a connection to couchdb
    conn = util.get_connection(couch_params.use_ssl, couch_params.host,
                               couch_params.port)
    conn.connect()
    setup_main_db(conn, main_db)
    setup_user_db(conn)
    setup_avatar_db(conn, avatar_db)
    if master_params.skip_master is False:
        create_master_user(conn, master_params)
Ejemplo n.º 10
0
def getHostname(request):
    """
    Utility method for getting hostname of client.
    """
    if request.getClientIP() in LOOPBACK_ADDRESSES and has_headers(request, X_FORWARDED_FOR):
        # nginx typically returns ip addresses
        addr = get_headers(request, X_FORWARDED_FOR)
        if isIPAddress(addr):
            # we really shouldn't do such blocking calls in twisted,
            # but the twisted dns interface is rather terrible and
            # odd things happen when using it
            # Set timeout to 1 second to limit the possible damage
            try:
                socket.setdefaulttimeout(1)
                info = socket.gethostbyaddr(addr)
                return info[0]
            except socket.error, msg:
                log.msg("Error performing reverse lookup: %s" % msg)
                return addr
        else:
            addr
Ejemplo n.º 11
0
def getHostname(request):
    """
    Utility method for getting hostname of client.
    """
    if request.getClientIP() in LOOPBACK_ADDRESSES and has_headers(
            request, X_FORWARDED_FOR):
        # nginx typically returns ip addresses
        addr = get_headers(request, X_FORWARDED_FOR)
        if isIPAddress(addr):
            # we really shouldn't do such blocking calls in twisted,
            # but the twisted dns interface is rather terrible and
            # odd things happen when using it
            # Set timeout to 1 second to limit the possible damage
            try:
                socket.setdefaulttimeout(1)
                info = socket.gethostbyaddr(addr)
                return info[0]
            except socket.error, msg:
                log.msg("Error performing reverse lookup: %s" % msg)
                return addr
        else:
            addr
Ejemplo n.º 12
0
def insert_artist_genres():
    conn, cursor = util.connect2RDS()
    headers = util.get_headers(client_id, client_secret)

    cursor.execute("SELECT id FROM artists")
    artists = []

    for (id, ) in cursor.fetchall():
        artists.append(id)

    artist_batch = [artists[i:i + 50] for i in range(0, len(artists), 50)]

    artist_genres = []
    for i in artist_batch:

        ids = ','.join(i)
        URL = "https://api.spotify.com/v1/artists/?ids={}".format(ids)

        r = requests.get(URL, headers=headers)
        raw = json.loads(r.text)

        for artist in raw['artists']:
            for genre in artist['genres']:

                artist_genres.append({
                    'artist_id': artist['id'],
                    'genre': genre
                })

    for data in artist_genres:
        util.insert_row(cursor, data, 'artist_genres')

    conn.commit()
    cursor.close()

    sys.exit(0)
Ejemplo n.º 13
0
def insert_artists():
    conn, cursor = util.connect2RDS()
    headers = util.get_headers()

    artists = []
    with open('artist_list.csv') as f:
        raw = csv.reader(f)
        for row in raw:
            artists.append(row[0])

    for a in artists:
        params = {"q": a, "type": "artist", "limit": "1"}
        r = requests.get("https://api.spotify.com/v1/search",
                         params=params,
                         headers=headers)
        raw = json.loads(r.text)
        artist = {}
        try:
            artist_raw = raw['artists']['items'][0]
            if artist_raw['name'] == params['q']:

                artist.update({
                    'id': artist_raw['id'],
                    'name': artist_raw['name'],
                    'followers': artist_raw['followers']['total'],
                    'popularity': artist_raw['popularity'],
                    'url': artist_raw['external_urls']['spotify'],
                    'image_url': artist_raw['images'][0]['url']
                })
                util.insert_row(cursor, artist, 'artists')
        except:
            logging.error('something worng')
            continue

    conn.commit()
    sys.exit(0)
Ejemplo n.º 14
0
def editor():
    if 'format' in request.form:
        file_type = request.form['format']
    else:
        file_type = 'csv'
    if 'callback' in request.form:
        callback_url = request.form['callback']
    else:
        callback_url = ""
    ontologies = request.form.getlist('ontologies')
    if len(ontologies) == 0:
        return render_template('msg.html',
                               msg="You should select at least one ontology",
                               msg_title="Error")
    print("number of ontologies: " + str(len(ontologies)))
    print(ontologies)
    print(request.form)
    error_msg = None
    warning_msg = None
    uploaded = False
    if 'source' not in request.form or request.form['source'].strip() == "":
        if 'sourcefile' in request.files:
            sourcefile = request.files['sourcefile']
            if sourcefile.filename != "":
                original_file_name = sourcefile.filename
                filename = secure_filename(sourcefile.filename)
                fname = util.get_random_string(4) + "-" + filename
                uploaded_file_dir = os.path.join(UPLOAD_DIR, fname)
                if not os.path.exists(UPLOAD_DIR):
                    os.mkdir(UPLOAD_DIR)
                sourcefile.save(uploaded_file_dir)
                uploaded = True
            else:
                print("blank source file")
        else:
            print('not sourcefile')
        if not uploaded:
            return render_template('msg.html',
                                   msg="Expecting an input file",
                                   msg_title="Error")
    else:
        source = request.form['source']
        original_file_name = source.split('/')[-1]
        filename = secure_filename(original_file_name)
        r = requests.get(source, allow_redirects=True)
        if r.status_code == 200:
            fname = util.get_random_string(4) + "-" + filename
            uploaded_file_dir = os.path.join(UPLOAD_DIR, fname)
            f = open(uploaded_file_dir, 'w')
            f.write(r.content)
            f.close()
        else:
            error_msg = "the source %s can not be accessed" % source
            print(error_msg)
            return render_template('msg.html',
                                   msg=error_msg,
                                   msg_title="Error")

    headers = util.get_headers(uploaded_file_dir, file_type=file_type)
    if headers == []:
        error_msg = "Can't parse the source file "
        return render_template('msg.html', msg=error_msg, msg_title="Error")
    labels = util.get_classes_as_txt(ontologies, data_dir=DATA_DIR)
    # f = open(os.path.join(DATA_DIR, "labels.txt"))
    return render_template('editor.html',
                           labels_txt=labels,
                           ontologies_txt=",".join(ontologies),
                           headers=headers,
                           callback=callback_url,
                           file_name=fname,
                           error_msg=error_msg,
                           warning_msg=warning_msg)
Ejemplo n.º 15
0
def headers(conf_path):
    return get_headers(conf_path) + ["issued eai", "currency seat ordinal"]
Ejemplo n.º 16
0
def toS3():
    conn, cursor = util.connect2RDS()
    headers = util.get_headers()

    cursor.execute("SELECT id FROM artists")

    top_track_keys = {
        'id':'id',
        'name':'name',
        'popularity':'popularity',
        'external_urls':'external_urls.spotify'
    }

    top_tracks = []
    for(id, ) in cursor.fetchall():
        URL = "https://api.spotify.com/v1/artists/{}/top-tracks".format(id)
        params = {
            'country':'US'
        }
        r = requests.get(URL, params = params, headers = headers)
        raw = json.loads(r.text)

        for i in raw['tracks']:
            top_track = {}
            for k, v in top_track_keys.items():
                top_track.update({k:jsonpath.jsonpath(i,v)[0]})
                top_track.update({'artist_id':id})
                top_tracks.append(top_track)

    top_tracks = pd.DataFrame(top_tracks)
    top_tracks.to_parquet('top-tracks.parquet',engine='pyarrow',compression='snappy')

    track_ids = [top_tracks.loc[i,'id'] for i in top_tracks.index]

    dt = datetime.datetime.utcnow().strftime("%Y-%m-%d")

    s3 = boto3.resource('s3')
    object = s3.Object('kihong-spotify-lambda','top-tracks/dt={}/top-tracks.parquet'.format(dt))
    data = open('top-tracks.parquet','rb')
    object.put(Body=data)
    ########################################################################################################
    #
    #
    #
    ########################################################################################################

    tracks_batch = [track_ids[i:i+100] for i in range(0,len(track_ids),100)]

    audio_features = []
    for i in tracks_batch:
        ids = ','.join(i)
        URL = "https://api.spotify.com/v1/audio-features/?ids={}".format(ids)

        r = requests.get(URL,headers=headers)
        raw = json.loads(r.text)

        audio_features.extend(raw['audio_features'])

    audio_features = pd.DataFrame(audio_features)
    audio_features.to_parquet('audio-features.parquet',engine='pyarrow',compression='snappy')

    object = s3.Object('kihong-spotify-lambda', 'audio-features/dt={}/audio-features.parquet'.format(dt))
    data = open('audio-features.parquet', 'rb')
    object.put(Body=data)
Ejemplo n.º 17
0
    def editor():
        if 'format' in request.form:
            file_type = request.form['format']
        else:
            file_type = 'csv'
        if 'callback' in request.form:
            callback_url = request.form['callback']
        else:
            callback_url = ""
        kg = None
        if 'kg' in request.form:
            if request.form['kg'].strip() != "":
                kg = request.form['kg'].strip()

        ontologies = request.form.getlist('ontologies')
        if len(ontologies) == 0:
            return render_template(
                'msg.html',
                msg="You should select at least one ontology",
                msg_title="Error")
        logger.debug("number of ontologies: " + str(len(ontologies)))
        logger.debug(str(ontologies))
        logger.debug(str(request.form))
        error_msg = None
        warning_msg = None
        uploaded = False
        if 'source' not in request.form or request.form['source'].strip(
        ) == "":
            if 'sourcefile' in request.files:
                sourcefile = request.files['sourcefile']
                if sourcefile.filename != "":
                    original_file_name = sourcefile.filename
                    filename = secure_filename(sourcefile.filename)
                    fname = util.get_random_string(4) + "-" + filename
                    uploaded_file_dir = os.path.join(UPLOAD_DIR, fname)
                    if not os.path.exists(UPLOAD_DIR):
                        os.makedirs(UPLOAD_DIR)
                    sourcefile.save(uploaded_file_dir)
                    uploaded = True
                else:
                    logger.debug("blank source file")
            else:
                logger.debug('not sourcefile')
            if not uploaded:
                return render_template('msg.html',
                                       msg="Expecting an input file",
                                       msg_title="Error")
        else:
            source = request.form['source']
            original_file_name = source.split('/')[-1]
            filename = secure_filename(original_file_name)
            r = requests.get(source, allow_redirects=True)
            if r.status_code == 200:
                fname = util.get_random_string(4) + "-" + filename
                uploaded_file_dir = os.path.join(UPLOAD_DIR, fname)
                if not os.path.exists(UPLOAD_DIR):
                    os.makedirs(UPLOAD_DIR)
                f = open(uploaded_file_dir, 'w', encoding='utf-8')
                f.write(r.text)
                f.close()
            else:
                error_msg = "the source %s can not be accessed" % source
                logger.debug(error_msg)
                return render_template('msg.html',
                                       msg=error_msg,
                                       msg_title="Error")

        headers = util.get_headers(uploaded_file_dir, file_type=file_type)
        if headers == []:
            error_msg = "Can't parse the source file "
            return render_template('msg.html',
                                   msg=error_msg,
                                   msg_title="Error")

        logger.debug("headers: ")
        logger.debug(str(headers))
        labels = ""
        for o in ontologies:
            o_labels = None
            try:
                o_labels = util.get_classes_as_txt([o], data_dir=DATA_DIR)
            except:
                o_labels = util.get_classes_as_txt([o], data_dir=ONT_DIR)
            if o_labels:
                labels += o_labels
        logger.debug("labels: ")
        logger.debug(str(labels))

        return render_template('editor.html',
                               labels_txt=labels,
                               ontologies_txt=",".join(ontologies),
                               headers=headers,
                               kg=kg,
                               callback=callback_url,
                               file_name=fname,
                               error_msg=error_msg,
                               warning_msg=warning_msg)