Example #1
0
def google_colab_authenticate_user():
    try:
        from google.colab import auth
        auth.authenticate_user()
    except Exception as e:
        print("not in googole colab environment!")
def export_model(
    model_name='117M',
    seed=None,
    nsamples=0,
    batch_size=1,
    length=None,
    top_k=0,
    version=1,
    folder_id=None,
):
    """
    Run the sample_model
    :model_name=117M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to
     reproduce results
    :nsamples=0 : Number of samples to return, if 0, continues to
     generate samples indefinately.
    :batch_size=1 : Number of batches (only affects speed/memory).
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
     :version=1 : Integer value giving the version the model is exported as.
     :folder_id=None : If the google drive is being used, specify the folder to upload here. Otherwise, keep as None
    :
    """
    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        np.random.seed(seed)
        tf.set_random_seed(seed)
        temperature = tf.placeholder("float", [1])
        output_tensor = sample.sample_sequence(
            hparams=hparams,
            length=length,
            start_token=enc.encoder['<|endoftext|>'],
            batch_size=batch_size,
            temperature=temperature,
            top_k=top_k)[:, 1:]

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        def export_model(
                path):  #Thanks Siraj! Couldn't have done it without you!
            #Link is https://github.com/llSourcell/How-to-Deploy-a-Tensorflow-Model-in-Production/blob/master/custom_model.py
            print("Exporting trained model to ", path)
            builder = saved_model_builder.SavedModelBuilder(path)
            input_temperature = utils.build_tensor_info(temperature)
            output = utils.build_tensor_info(output_tensor)
            prediction_signature = signature_def_utils.build_signature_def(
                inputs={'temperature': input_temperature},
                outputs={'output': output},
                method_name=signature_constants.PREDICT_METHOD_NAME)
            builder.add_meta_graph_and_variables(
                sess, [tf.saved_model.tag_constants.SERVING],
                signature_def_map={'predict': prediction_signature},
                main_op=tf.tables_initializer())
            builder.save()

        base_directory = "./export_model"
        export_path = f"./export_model/{version}"
        export_model(export_path)
        if (folder_id != None):
            from pydrive.auth import GoogleAuth
            from pydrive.drive import GoogleDrive
            from google.colab import auth
            from oauth2client.client import GoogleCredentials

            # 1. Authenticate and create the PyDrive client.
            auth.authenticate_user()
            gauth = GoogleAuth()
            gauth.credentials = GoogleCredentials.get_application_default()
            drive = GoogleDrive(gauth)
            for content in os.listdir(export_path):
                f = drive.CreateFile(
                    {"parents": [{
                        "kind": "drive#fileLink",
                        "id": folder_id
                    }]})
                f.SetContentFile(f"{export_path}" + "/" + content)
                f.Upload()
 def __init__(self):
     # 1. Authenticate and create the PyDrive client.
     auth.authenticate_user()
     gauth = GoogleAuth()
     gauth.credentials = GoogleCredentials.get_application_default()
     self.drive = GoogleDrive(gauth)
Example #4
0
 def __init__(self):
     auth.authenticate_user()
     self.gauth = GoogleAuth()
     self.gauth.credentials = GoogleCredentials.get_application_default()
     self.drive = GoogleDrive(self.gauth)
    def mount(self):
        auth.authenticate_user()
        drive.mount('/content/gdrive/')
        baseDir = '/content/gdrive/My Drive/Colab/'

        return baseDir
Example #6
0
# local environment.
#
# 6. Enter the path to your service account key as the
# `GOOGLE_APPLICATION_CREDENTIALS` variable in the cell below and run the cell.

#%%
import sys

# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your GCP account. This provides access to your
# Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

if 'google.colab' in sys.modules:
    from google.colab import auth as google_auth
    google_auth.authenticate_user()

# If you are running this notebook locally, replace the string below with the
# path to your service account key and run this cell to authenticate your GCP
# account.
else:
    get_ipython().run_line_magic('env', "GOOGLE_APPLICATION_CREDENTIALS ''")

#%% [markdown]
# ### Create a Cloud Storage bucket
#
# **The following steps are required, regardless of your notebook environment.**
#
# When you submit a training job using the Cloud SDK, you upload a Python package
# containing your training code to a Cloud Storage bucket. AI Platform runs
# the code from this package. In this tutorial, AI Platform also saves the
Example #7
0
def train(args):
    #  config_path()
    outpath = args.out_path
    log_dir = os.path.join(outpath, 'logs')
    model_dir = os.path.join(outpath, 'model')
    sample_dir = os.path.join(outpath, 'sample')
    dirs = [log_dir, model_dir, sample_dir]
    for dir_ in dirs:
        if not os.path.exists(dir_):
            os.makedirs(dir_)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    sess = tf.Session(config=config)
    device = '/gpu:0'
    if args.gpu == 1:
        device = '/gpu:1'
    with tf.device(device):
        if args.net == 'vanilla':
            net = RoomnetVanilla()
        if args.net == 'rcnn':
            net = RcnnNet()
        if args.net == 'classify':
            net = ClassifyNet()
        net.build_model()
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    if args.train == 0:
        print('train from scratch')
        start_step = 0
        # start_epoch=0
    else:
        start_step = net.restore_model(sess, model_dir)

    train_writer = tf.summary.FileWriter(log_dir, sess.graph)
    start_time = time.time()
    fetchworker = BatchFetcher(datadir, True, True)
    fetchworker.start()
    fetchworker2 = BatchFetcher(val_datadir, False, True)
    fetchworker2.start()
    step_per_epoch = fetchworker.get_max_step()
    fout = open(os.path.join(outpath, 'acc.txt'), 'a')
    if 1:
        #for epo in range(start_epoch,max_epoch+1):
        print('total training steps', max_epoch * step_per_epoch + 1)
        for i in range(start_step, max_epoch * step_per_epoch + 1):
            im_in, lay_gt, label_gt, names = fetchworker.fetch()
            net.set_feed(im_in, lay_gt, label_gt, i)
            net.run_optim(sess)
            net.step_assign(sess, i)
            global_step = i
            # net.step_plus(sess)
            # _,global_step=net.run_step()
            if np.mod(global_step, 10) == 0:
                summ_str = net.run_sum(sess)
                train_writer.add_summary(summ_str, global_step)
                im_in, lay_gt, label_gt, names = fetchworker2.fetch()
                net.set_feed(im_in, lay_gt, label_gt, i)
                pred_class, pred_lay = net.run_result(sess)
                c_out = np.argmax(pred_class, axis=1)
                c_gt = np.argmax(label_gt, axis=1)
                acc = np.mean(np.array(np.equal(c_out, c_gt), np.float32))
                print('accuracy', acc)
                fout.write('%s %s\n' % (i, acc))
            if np.mod(global_step, 100) == 0:
                net.save_model(sess, model_dir, global_step)
            if np.mod(global_step, 100) == 0:
                im_in, lay_gt, label_gt, names = fetchworker2.fetch()
                net.set_feed(im_in, lay_gt, label_gt, i)
                pred_class, pred_lay = net.run_result(sess)
                #        try:
                #          save_results(im_in, lay_gt, label_gt, names, pred_lay, pred_class, sample_dir, global_step)
                #        except:
                np.savez(os.path.join(sample_dir, '%s.npz' % (i)),
                         im=im_in,
                         gt_lay=lay_gt,
                         gt_label=label_gt,
                         names=names,
                         pred_lay=pred_lay,
                         pred_class=pred_class)
            if np.mod(global_step, 1000) == 0:
                auth.authenticate_user()
                gauth = GoogleAuth()
                gauth.credentials = GoogleCredentials.get_application_default()
                drive = GoogleDrive(gauth)
                strg = str(global_step)
                fl = [
                    'model' + strg + '-' + strg + '.data-00000-of-00001',
                    'model' + strg + '-' + strg + '.meta',
                    'model' + strg + '-' + strg + '.index'
                ]
                for i in range(3):
                    uploaded = drive.CreateFile({'title': fl[i]})
                    uploaded.SetContentFile(
                        '/content/path-to-output-train/model/' + fl[i])
                    uploaded.Upload()
                    print('Uploaded file :{}'.format(fl[i]))
            print('[step: %d] [time: %s]' % (i, time.time() - start_time))
            net.print_loss_acc(sess)
    fetchworker.shutdown()
    fetchworker2.shutdown()
Example #8
0
def get_qcs_objects_for_notebook(
    project_id: Optional[str] = None,
    processor_id: Optional[str] = None
) -> QCSObjectsForNotebook:  # pragma: nocover
    """Authenticates on Google Cloud, can return a Device and Simulator.

    Args:
        project_id: Optional explicit Google Cloud project id. Otherwise,
            this defaults to the environment variable GOOGLE_CLOUD_PROJECT.
            By using an environment variable, you can avoid hard-coding
            personal project IDs in shared code.
        processor_id: Engine processor ID (from Cloud console or
            ``Engine.list_processors``).

    Returns:
        An instance of DeviceSamplerInfo.
    """

    # Check for Google Application Default Credentials and run
    # interactive login if the notebook is executed in Colab. In
    # case the notebook is executed in Jupyter notebook or other
    # IPython runtimes, no interactive login is provided, it is
    # assumed that the `GOOGLE_APPLICATION_CREDENTIALS` env var is
    # set or `gcloud auth application-default login` was executed
    # already. For more information on using Application Default Credentials
    # see https://cloud.google.com/docs/authentication/production
    try:
        from google.colab import auth
    except ImportError:
        print(
            "Not running in a colab kernel. Will use Application Default Credentials."
        )
    else:
        print("Getting OAuth2 credentials.")
        print("Press enter after entering the verification code.")
        try:
            auth.authenticate_user(clear_output=False)
            print("Authentication complete.")
        except Exception as exc:
            print(f"Authentication failed: {exc}")

    # Attempt to connect to the Quantum Engine API, and use a simulator if unable to connect.
    sampler: Union[PhasedFSimEngineSimulator, QuantumEngineSampler]
    try:
        engine = get_engine(project_id)
        if processor_id:
            processor = engine.get_processor(processor_id)
        else:
            processors = engine.list_processors()
            if not processors:
                raise ValueError("No processors available.")
            processor = processors[0]
            print(
                f"Available processors: {[p.processor_id for p in processors]}"
            )
            print(f"Using processor: {processor.processor_id}")
        device = processor.get_device()
        sampler = processor.get_sampler()
        signed_in = True
    except Exception as exc:
        print(f"Unable to connect to quantum engine: {exc}")
        print("Using a noisy simulator.")
        sampler = PhasedFSimEngineSimulator.create_with_random_gaussian_sqrt_iswap(
            mean=SQRT_ISWAP_INV_PARAMETERS,
            sigma=PhasedFSimCharacterization(theta=0.01,
                                             zeta=0.10,
                                             chi=0.01,
                                             gamma=0.10,
                                             phi=0.02),
        )
        device = Sycamore
        signed_in = False

    return QCSObjectsForNotebook(device=device,
                                 sampler=sampler,
                                 signed_in=signed_in)
Example #9
0
  def on_epoch_end(self, epoch, logs={}):  
    train_predictions = model.predict(X_train)
    y_train_category=[np.argmax(v) for v in Y_train]
    y_train_predict_category=[np.argmax(v) for v in train_predictions]
    
    test_predictions = model.predict(X_test)
    y_test_category=[np.argmax(v) for v in Y_test]
    y_test_predict_category=[np.argmax(v) for v in test_predictions]
    
    # ------
    
    train_acc = accuracy_score(
        y_train_category, y_train_predict_category)
    
    train_precision = precision_score(
        y_train_category, y_train_predict_category, average='micro')
    
    train_recall = recall_score(
        y_train_category, y_train_predict_category, average='micro')
    
    train_f1 = f1_score(
        y_train_category, y_train_predict_category, average='micro')
    
    test_acc = accuracy_score(
        y_test_category, y_test_predict_category)
    
    test_precision = precision_score(
        y_test_category, y_test_predict_category, average='micro')
    
    test_recall = recall_score(
        y_test_category, y_test_predict_category, average='micro')
    
    test_f1 = f1_score(
        y_test_category, y_test_predict_category, average='micro')
    
    # ------
    
    self.train_accs.append(train_acc)
    self.train_precisions.append(train_precision)
    self.train_recalls.append(train_recall)
    self.train_f1_scores.append(train_f1)

    self.test_accs.append(test_acc)
    self.test_precisions.append(test_precision)
    self.test_recalls.append(test_recall)
    self.test_f1_scores.append(test_f1)
    
    # ------
    
    print ('train_acc: %.4f\ttrain_precision: %.4f\t\ttrain_recall: %.4f\ttrain_f1_score: %.4f' 
#            % (train_acc, train_precision, train_recall, train_f1))
    
    print ('test_acc: %.4f\ttest_precision: %.4f\t\ttest_recall: %.4f\ttest_f1_score: %.4f' 
#            % (test_acc, test_precision, test_recall, test_f1))
    
    print ('----------------------------------------------------------')

# from imblearn.over_sampling import SMOTE
# from sklearn.utils import class_weight
# smote = SMOTE()

# X_train, Y_train = smote.fit_resample(X_train,Y_train)
# X_train.shape

# X_train =X_train.reshape(23940,22,9)

#@title Create the model


# class_weight = class_weight.comput_class_weight("Balanced",np.unique(Y_train))

keras.backend.clear_session()
input_shape = X_train.shape[1:]
print(input_shape)


model = Sequential()


activation_Dense='softmax' 
units = 400
#LSTM MODEL



model.add(Bidirectional(LSTM(units=units,
                                  return_sequences=True,
                                 input_shape=input_shape)))

model.add(Dropout(rate=0.1))

model.add(Bidirectional(LSTM(units=units,
                                  return_sequences=True)))

model.add(Dropout(rate=0.1))

model.add(Bidirectional(LSTM(units=units,
                                  return_sequences=True)))

model.add(Dropout(rate=0.1))

model.add(Bidirectional(LSTM(units=units,
                                  return_sequences=True)))







model.add(Flatten())

model.add(Dense(12,activation=activation_Dense))


adam = optimizers.Adam(lr=0.001, 
                       beta_1=0.9, 
                       beta_2=0.999, 
                       epsilon=None, 
                       decay=0.0, 
                       amsgrad=False)

model.compile(loss = 'categorical_crossentropy',
              optimizer = "adam")

metricsPrinter = MetricsPrinter()



import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

number_of_epochs = 10000 #35
earlyStopping = EarlyStopping(monitor="val_loss",patience = 5, 
                              verbose = 1)

mcp_save = ModelCheckpoint("/content/gdrive/My Drive/ChallengeUP-master/ChallengeUP-master/Data/3boost_test1.h5",
                           save_best_only = True
                           ,monitor = "val_loss", verbose =1)


model.fit(
  X_train,
  Y_train,
  epochs=number_of_epochs,
  batch_size=32, 
  verbose=1, 
  validation_split = 0.3,
  shuffle=True,
  callbacks=[metricsPrinter,earlyStopping,mcp_save]
)

plt.figure(figsize=(10,5), dpi=100)
plt.grid(True, axis='y', linestyle='--', linewidth=0.5)


plt.plot(metricsPrinter.train_f1_scores, 'bo-', linewidth=3)
plt.plot(metricsPrinter.test_f1_scores, 'ro-', linewidth=3)
plt.ylabel('Percentage')
plt.xlabel('Epoch')
plt.legend(['Train F1-score',
            'Validation F1-score'])

plt.show()

model.summary()

# load a saved model
from keras.models import load_model

model = load_model("/content/gdrive/My Drive/ChallengeUP-master/ChallengeUP-master/Data/3boost_test1.h5")

from sklearn.metrics import confusion_matrix

train_predictions = model.predict(X_train)
y_train_category=[np.argmax(v) for v in Y_train]
y_train_predict_category=[np.argmax(v) for v in train_predictions]
    
test_predictions = model.predict(X_test)
y_test_category=[np.argmax(v) for v in Y_test]
y_test_predict_category=[np.argmax(v) for v in test_predictions]


labels=[1,2,3,4,5,6,7,8,9,10,11,20]

cnf_matrix1 = confusion_matrix(y_train_category, y_train_predict_category)
confusion_ma1 = pd.DataFrame(cnf_matrix1, columns=labels, index=labels)

cnf_matrix2 = confusion_matrix(y_test_category, y_test_predict_category)
confusion_ma2 = pd.DataFrame(cnf_matrix2, columns=labels, index=labels)

#@title Read test set

base_test = pd.read_csv(path + 'CompleteDataSet_testing_competition.csv')

base_test['TimeStamps'] = pd.to_datetime(base_test['TimeStamps'])
base_test = base_test.iloc[1:] 
real_test_splited_per_second, higher_length_real_test = split_per_second(
    base_test,sensors,False)

#@title Generating the final results
X_real_test = completeData(X=real_test_splited_per_second,
                           final_len=higher_length)

print("Test size:",len(X_real_test))
X_real_test2 = np.asarray(X_real_test).astype("float32")
real_test_predictions = model.predict(X_real_test2)

y_predict_real=[np.argmax(t) for t in real_test_predictions]

y_predict_real = np.asarray(y_predict_real)+1

y_predict_real = [20 if prediction==12 else prediction for prediction 
                  in y_predict_real]

test_real = base_test['TimeStamps'].apply(
    lambda x: x.replace(microsecond=0))

test_real = test_real.drop_duplicates()

test_real = pd.to_datetime(test_real, 
                           format='%d-%b-%Y %H:%M:%S', 
                           utc=True)

print(test_real.shape[0],len(y_predict_real))

d = {'timestamp': test_real, 'target': y_predict_real}
final_results = pd.DataFrame(data=d)
final_results = final_results[['timestamp','target']]
print(final_results.head(100))

print(final_results.head())

#@title Submission File

!pip install -U -q PyDrive
from google.colab import auth
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from oauth2client.client import GoogleCredentials
import datetime

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
driveWriter = GoogleDrive(gauth)

folder_id = '1El1JO73c9fdMmqnVkB-8dryE47oyoJXP'
file_title = 'test-results' + str(datetime.datetime.now()) + "3boost_test1" + '.csv' 

submission_file = driveWriter.CreateFile({
    'title': file_title,
    'parents': [{'kind': 'drive#fileLink', 'id': folder_id}]
})

submission_file.SetContentString(
    final_results.to_csv(
        index=False, columns=['timestamp', 'target']
    )
)

submission_file.Upload()
Example #10
0
def download(gdrive_id, file, credentials_path, block_size=500):
    """
    Download the file from gdrive and writes it to file

    Args:
        gdrive_id (str): Id of the file in gdrive
        file (str): file path/name where to write the download contents
        credentials_path (str): path from where to retrieve/save the gdriv credentials
        chunk_size (int): Size of download chunks in MB
    """
    # If modifying these scopes, delete the file token.pickle.
    SCOPES = ['https://www.googleapis.com/auth/drive.readonly']

    creds = None
    # The file token.pickle stores the user's access and refresh tokens
    token_pickle = os.path.join(credentials_path, 'token.pickle')
    if os.path.exists(token_pickle):
        with open(token_pickle, 'rb') as token:
            creds = pickle.load(token)

    # If there are no (valid) credentials available, let the user log in.
    creds_json = os.path.join(credentials_path, 'credentials.json')
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            try:
                from google.colab import auth
                auth.authenticate_user()
            except:
                flow = InstalledAppFlow.from_client_secrets_file(
                    creds_json, SCOPES)
                creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open(token_pickle, 'wb') as token:
            pickle.dump(creds, token)

    service = build('drive', 'v3', credentials=creds)

    # Request file metadata
    request = service.files().get_media(fileId=gdrive_id)

    # File to write the downladed data
    fh = open(file, "wb")

    # Streamer and writer of the file
    downloader = MediaIoBaseDownload(fh,
                                     request,
                                     chunksize=1024 * 1024 * block_size)

    # progress bar
    bar = tqdm(
        desc=file.stem,
        total=100,
    )

    # Loop download chunks
    done = False
    while done is False:
        status, done = downloader.next_chunk()
        bar.n = int(status.progress() * 100)
        bar.refresh()
Example #11
0
 def __init__(self, creds=None):
     self.cred = creds or auth.authenticate_user()
     self.drive = build('drive', 'v3', credentials=self.cred)
Example #12
0
def getGoogleCloudBucket():
    from google.colab import auth
    auth.authenticate_user()
    return 'gs://medicalblockchain_dev'
Example #13
0
"""

model_final.save("faceid_big_rgbd_2.h5")

from google.colab import files

# Install the PyDrive wrapper & import libraries.
# This only needs to be done once in a notebook.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once in a notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Create & upload a file.
uploaded = drive.CreateFile({'title': 'faceid_big_rgbd.h5'})
uploaded.SetContentFile('faceid_big_rgbd.h5')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))

# Install the PyDrive wrapper & import libraries.
# This only needs to be done once per notebook.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
Example #14
0
def export_as_gsheets(input_data, query="", title=None, verbose=True):
    """Save data to google sheets with one-line. 

    Works with raw JSON (from API), or even a Dataframe. 

    Parameters
    ----------
    input_data: JSON or DataFrame 
        The data to be uploaded
    query: str
        The DSL query - this is neeeded only when raw API JSON is passed
    title: str, optional 
        The spreadsheet title, if one wants to reuse an existing spreadsheet.
    verbose: bool, default=True
        Verbose mode

    Notes
    -----
    This method assumes that the calling environment can provide valid Google authentication credentials.
    There are two routes to make this work, depending on whether one is using Google Colab or a traditional Jupyter environment.

    **Google Colab**
    This is the easiest route. In Google Colab, all required libraries are already available. The `to_gsheets` method simply triggers the built-in authentication process via a pop up window. 
    
    **Jupyter**
    This route involves a few more steps. In Jupyter, it is necessary to install the ``gspread``, ``oauth2client`` and ``gspread_dataframe`` modules first. Secondly, one needs to create Google Drive access credentials using OAUTH (which boils down to a JSON file). Note that the credentials file needs to be saved in: `~/.config/gspread/credentials.json` (for gpread to work correctly). 
    These steps are described at https://gspread.readthedocs.io/en/latest/oauth2.html#for-end-users-using-oauth-client-id.

    Returns
    -------
    str
        The google sheet URL as a string.   

    Example
    -------
    >>> import pandas as pd
    >>> from dimcli.utils export_as_gsheets
    >>> cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
                 'Price': [22000,25000,27000,35000]
                 }
    >>> df = pd.DataFrame(cars, columns = ['Brand', 'Price'])
    >>> export_as_gsheets(df)
    ..authorizing with google..
    ..creating a google sheet..
    ..uploading..
    Saved:
    https://docs.google.com/spreadsheets/d/1tsyRFDEsADltWDdqjuyDWDOg81sl9hN3Nu8MXVlqDDI
    """

    if 'google.colab' in sys.modules:
        from google.colab import auth
        auth.authenticate_user()

        import gspread
        from gspread_dataframe import set_with_dataframe
        from oauth2client.client import GoogleCredentials
        gc = gspread.authorize(GoogleCredentials.get_application_default())

    else:
        try:
            import gspread
            from oauth2client.service_account import ServiceAccountCredentials
            from gspread_dataframe import set_with_dataframe
        except:
            raise Exception(
                "Missing libraries. Please install gspread, oauth2client and gspread_dataframe: `pip install gspread gspread_dataframe oauth2client -U`."
            )

        if verbose: click.secho("..authorizing with google..")
        try:
            gc = gspread.oauth()
        except:
            raise Exception(
                "Google authorization failed. Do you have all the required files? Please see the documentation for more information."
            )

    if type(input_data) == type({}):
        # JSON
        if not query:
            raise Exception(
                "When passing raw JSON you also have to provide the DSL query, which is needed to determine the primary records key."
            )
        return_object = line_search_return(query)
        try:
            df = json_normalize(jjson[return_object], errors="ignore")
        except:
            df = json_normalize(jjson, errors="ignore")

    elif type(input_data) == DataFrame:
        # Dataframe
        df = input_data

    else:
        raise Exception(f"Input type '{str(type(input_data))}' not supported.")

    if title:
        if verbose: click.secho(f"..opening google sheet with title: {title}")
        gsheet = gc.open(title)
    else:
        if verbose: click.secho("..creating a google sheet..")
        title = "dimcli-export-" + time.strftime("%Y%m%d-%H%M%S")
        gsheet = gc.create(title)

    worksheet = gsheet.sheet1
    click.secho("..uploading..")
    set_with_dataframe(worksheet, df)

    # https://gspread.readthedocs.io/en/latest/api.html#gspread.models.Spreadsheet.share
    gsheet.share(None, perm_type='anyone',
                 role='reader')  # anyone can see with url
    spreadsheet_url = "https://docs.google.com/spreadsheets/d/%s" % gsheet.id
    # if verbose: click.secho(f"Saved:\n{spreadsheet_url}", bold=True)
    return spreadsheet_url
Example #15
0
def _gradient_descent(objective,
                      p0,
                      it,
                      n_iter,
                      n_iter_check=1,
                      n_iter_without_progress=300,
                      momentum=0.8,
                      learning_rate=200.0,
                      min_gain=0.01,
                      min_grad_norm=1e-7,
                      verbose=0,
                      args=None,
                      kwargs=None):
    """Batch gradient descent with momentum and individual gains.

    Parameters
    ----------
    objective : function or callable
        Should return a tuple of cost and gradient for a given parameter
        vector. When expensive to compute, the cost can optionally
        be None and can be computed every n_iter_check steps using
        the objective_error function.

    p0 : array-like, shape (n_params,)
        Initial parameter vector.

    it : int
        Current number of iterations (this function will be called more than
        once during the optimization).

    n_iter : int
        Maximum number of gradient descent iterations.

    n_iter_check : int
        Number of iterations before evaluating the global error. If the error
        is sufficiently low, we abort the optimization.

    n_iter_without_progress : int, optional (default: 300)
        Maximum number of iterations without progress before we abort the
        optimization.

    momentum : float, within (0.0, 1.0), optional (default: 0.8)
        The momentum generates a weight for previous gradients that decays
        exponentially.

    learning_rate : float, optional (default: 200.0)
        The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If
        the learning rate is too high, the data may look like a 'ball' with any
        point approximately equidistant from its nearest neighbours. If the
        learning rate is too low, most points may look compressed in a dense
        cloud with few outliers.

    min_gain : float, optional (default: 0.01)
        Minimum individual gain for each parameter.

    min_grad_norm : float, optional (default: 1e-7)
        If the gradient norm is below this threshold, the optimization will
        be aborted.

    verbose : int, optional (default: 0)
        Verbosity level.

    args : sequence
        Arguments to pass to objective function.

    kwargs : dict
        Keyword arguments to pass to objective function.

    Returns
    -------
    p : array, shape (n_params,)
        Optimum parameters.

    error : float
        Optimum.

    i : int
        Last iteration.
    """
    if args is None:
        args = []
    if kwargs is None:
        kwargs = {}

    p = p0.copy().ravel()
    update = np.zeros_like(p)
    gains = np.ones_like(p)
    error = np.finfo(np.float).max
    best_error = np.finfo(np.float).max
    best_iter = i = it

    tic = time()
    for i in range(it, n_iter):

        if i % 50 == 0:
            print("checking credentials ~~")
            auth.authenticate_user()
            gauth = GoogleAuth()
            gauth.credentials = GoogleCredentials.get_application_default()
            preventExpire = drive.CreateFile()
            del preventExpire

        check_convergence = (i + 1) % n_iter_check == 0
        # only compute the error when needed
        kwargs['compute_error'] = check_convergence or i == n_iter - 1

        error, grad = objective(p, *args, **kwargs)
        grad_norm = linalg.norm(grad)

        inc = update * grad < 0.0
        dec = np.invert(inc)
        gains[inc] += 0.2
        gains[dec] *= 0.8
        np.clip(gains, min_gain, np.inf, out=gains)
        grad *= gains
        update = momentum * update - learning_rate * grad
        p += update

        if check_convergence:
            toc = time()
            duration = toc - tic
            tic = toc

            if verbose >= 2:
                print("[t-SNE] Iteration %d: error = %.7f,"
                      " gradient norm = %.7f"
                      " (%s iterations in %0.3fs) Yay!" %
                      (i + 1, error, grad_norm, n_iter_check, duration))

            if error < best_error:
                best_error = error
                best_iter = i
            elif i - best_iter > n_iter_without_progress:
                if verbose >= 2:
                    print("[t-SNE] Iteration %d: did not make any progress "
                          "during the last %d episodes. Finished." %
                          (i + 1, n_iter_without_progress))
                break
            if grad_norm <= min_grad_norm:
                if verbose >= 2:
                    print("[t-SNE] Iteration %d: gradient norm %f. Finished." %
                          (i + 1, grad_norm))
                break

    return p, error, i
Example #16
0
def getSpreadsheetAuth(): 
  auth.authenticate_user()
  spreadsheetAuth=gspread.authorize(GoogleCredentials.get_application_default())
  return spreadsheetAuth
Example #17
0
def bq_to_df(project_id, query):
    auth.authenticate_user()
    job_config = bigquery.QueryJobConfig()
    return bigquery.Client(project_id).query(
        query, job_config=job_config).to_dataframe()
Example #18
0
def drive_auth():
    auth.authenticate_user()
    gauth = GoogleAuth()
    gauth.credentials = GoogleCredentials.get_application_default()
    drive = GoogleDrive(gauth)
    return drive
Example #19
0
def google_authenticate():
    auth.authenticate_user()
    gc = gspread.authorize(GoogleCredentials.get_application_default())
    return gc
Example #20
0
def experiment(model_config):
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.logging.info("SCRIPT START")

    if model_config["use_tpu"]:
        assert 'COLAB_TPU_ADDR' in os.environ, 'Missing TPU; did you request a TPU in Notebook Settings?'

    auth.authenticate_user()

    tf.logging.info("TPU resolver started")

    if 'COLAB_TPU_ADDR' in os.environ:
        TF_MASTER = 'grpc://{}'.format(os.environ['COLAB_TPU_ADDR'])

        # Upload credentials to TPU.
        with tf.Session(TF_MASTER) as sess:
            with open('/content/adc.json', 'r') as f:
                auth_info = json.load(f)
            tf.contrib.cloud.configure_gcs(sess, credentials=auth_info)
        # Now credentials are set for all future sessions on this TPU.
    else:
        TF_MASTER = ''

    # os.environ['PROJECT_NAME']='nnproj'
    # os.environ['PROJECT_ZONE']='boh'
    # os.environ['TPU_NAME']='bah'
    #
    # tpu_cluster_resolver = TPUClusterResolver(
    #     tpu=os.environ['TPU_NAME'],
    #     project=os.environ['PROJECT_NAME'],
    #     zone=os.environ['PROJECT_ZONE'])

    if model_config["use_tpu"]:
        config = tpu.RunConfig(
            # cluster=tpu_cluster_resolver,
            tf_random_seed=RANDOM_SEED,
            master=TF_MASTER,
            model_dir=model_config['model_base_dir'] + os.path.sep + str(model_config["experiment_id"]),
            save_checkpoints_steps=500,
            save_summary_steps=250,
            tpu_config=tpu.TPUConfig(
                iterations_per_loop=500,
                num_shards=8,
                per_host_input_for_training=tpu.InputPipelineConfig.PER_HOST_V1))  # pylint: disable=line-too-long
    else:
        config = tpu.RunConfig(
            # cluster=tpu_cluster_resolver,
            # model_dir=model_config['model_base_dir'] + os.path.sep + str(model_config["experiment_id"]),
            save_checkpoints_steps=500,
            save_summary_steps=250)  # pylint: disable=line-too-long

    tf.logging.info("Creating datasets")
    urmp_train, urmp_eval, urmp_test = [
        urmp_input.URMPInput(mode=mode,
                             data_dir=model_config['data_path'],
                             transpose_input=False,
                             use_bfloat16=model_config['use_bfloat16'])
        for mode in ['train', 'eval', 'test']
    ]

    tf.logging.info("Assigning TPUEstimator")
    # Optimize in a +supervised fashion until validation loss worsens
    separator = tpu.TPUEstimator(
        use_tpu=model_config["use_tpu"],
        model_fn=unet_separator,
        config=config,
        train_batch_size=model_config['batch_size'],
        eval_batch_size=model_config['batch_size'],
        predict_batch_size=model_config['batch_size'],
        params={
            i: model_config[i]
            for i in model_config if (i != 'batch_size' and i != 'context')
        }  # TODO: context
    )

    if model_config['load_model']:
        tf.logging.info("Load the model")
        current_step = estimator._load_global_step_from_checkpoint_dir(
            model_config['model_base_dir'] + os.path.sep +
            str(model_config["experiment_id"]))

    if model_config['mode'] == 'train_and_eval':
        tf.logging.info("Train the model")
        # Should be an early stopping here, but it will come with tf 1.10
        separator.train(input_fn=urmp_train.input_fn,
                        steps=model_config['training_steps'])
        # ...zzz...
        tf.logging.info("Supervised training finished!")
        tf.logging.info("Evaluate model")
        # Evaluate the model.
        eval_result = separator.evaluate(
            input_fn=urmp_eval.input_fn,
            steps=model_config['evaluation_steps'])
        tf.logging.info('Evaluation results: %s' % eval_result)

    elif model_config['mode'] == 'predict':
        tf.logging.info("Test results and save predicted sources:")
        predictions = separator.predict(input_fn=urmp_test.input_fn)

        for prediction in predictions:
            Test.save_prediction(prediction,
                                 estimates_path=model_config["estimates_path"],
                                 sample_rate=model_config["expected_sr"])
        Utils.concat_and_upload(
            model_config["estimates_path"], model_config['model_base_dir'] +
            os.path.sep + str(model_config["experiment_id"]))
Example #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--device',
                        default='0,1,2,3',
                        type=str,
                        required=False,
                        help='设置使用哪些显卡')
    parser.add_argument('--model_config',
                        default='config/model_config_small.json',
                        type=str,
                        required=False,
                        help='选择模型参数')
    parser.add_argument('--tokenizer_path',
                        default='cache/vocab_processed.txt',
                        type=str,
                        required=False,
                        help='选择词库')
    parser.add_argument('--raw_data_path',
                        default='data/train.json',
                        type=str,
                        required=False,
                        help='原始训练语料')
    parser.add_argument('--tokenized_data_path',
                        default='data/tokenized/',
                        type=str,
                        required=False,
                        help='tokenized语料存放位置')
    parser.add_argument('--raw', action='store_true', help='是否先做tokenize')
    parser.add_argument('--epochs',
                        default=50,
                        type=int,
                        required=False,
                        help='训练循环')
    parser.add_argument('--batch_size',
                        default=1,
                        type=int,
                        required=False,
                        help='训练batch size')
    parser.add_argument('--lr',
                        default=1.5e-4,
                        type=float,
                        required=False,
                        help='学习率')
    parser.add_argument('--warmup_steps',
                        default=2000,
                        type=int,
                        required=False,
                        help='warm up步数')
    parser.add_argument('--log_step',
                        default=1,
                        type=int,
                        required=False,
                        help='多少步汇报一次loss')
    parser.add_argument('--stride',
                        default=768,
                        type=int,
                        required=False,
                        help='训练时取训练数据的窗口步长')
    parser.add_argument('--gradient_accumulation',
                        default=1,
                        type=int,
                        required=False,
                        help='梯度积累')
    parser.add_argument('--fp16', action='store_true', help='混合精度')
    parser.add_argument('--fp16_opt_level',
                        default='O1',
                        type=str,
                        required=False)
    parser.add_argument('--max_grad_norm',
                        default=1.0,
                        type=float,
                        required=False)
    parser.add_argument('--num_pieces',
                        default=100,
                        type=int,
                        required=False,
                        help='将训练语料分成多少份')
    parser.add_argument('--output_dir',
                        default='model/',
                        type=str,
                        required=False,
                        help='模型输出路径')
    parser.add_argument('--pretrained_model',
                        default='',
                        type=str,
                        required=False,
                        help='模型训练起点路径')
    parser.add_argument('--segment', action='store_true', help='中文以词为单位')
    parser.add_argument('--google_driver_save',
                        action='store_true',
                        help='是否保存模型到谷歌云盘')

    args = parser.parse_args()
    print('args:\n' + args.__repr__())

    if args.segment:
        from tokenizations import tokenization_bert_word_level as tokenization_bert
    else:
        from tokenizations import tokenization_bert

    os.environ["CUDA_VISIBLE_DEVICES"] = args.device  # 此处设置程序使用哪些显卡
    model_config = transformers.modeling_gpt2.GPT2Config.from_json_file(
        args.model_config)
    print('config:\n' + model_config.to_json_string())

    n_ctx = model_config.n_ctx
    full_tokenizer = tokenization_bert.BertTokenizer(
        vocab_file=args.tokenizer_path)
    #full_tokenizer.max_len = 999999
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print('using device:', device)

    raw_data_path = args.raw_data_path
    tokenized_data_path = args.tokenized_data_path
    raw = args.raw  # 选择是否从零开始构建数据集
    epochs = args.epochs
    batch_size = args.batch_size
    lr = args.lr
    warmup_steps = args.warmup_steps
    log_step = args.log_step
    stride = args.stride
    gradient_accumulation = args.gradient_accumulation
    fp16 = args.fp16  # 不支持半精度的显卡请勿打开
    fp16_opt_level = args.fp16_opt_level
    max_grad_norm = args.max_grad_norm
    num_pieces = args.num_pieces
    output_dir = args.output_dir

    if raw:
        print('building files')
        build_files(raw_data_path=raw_data_path,
                    tokenized_data_path=tokenized_data_path,
                    full_tokenizer=full_tokenizer,
                    num_pieces=num_pieces)
        print('files built')

    if not args.pretrained_model:
        model = transformers.modeling_gpt2.GPT2LMHeadModel(config=model_config)
    else:
        model = transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(
            args.pretrained_model)
    model.train()
    model.to(device)
    multi_gpu = False
    full_len = 0
    print('calculating total steps')
    for i in tqdm(range(num_pieces)):
        with open(tokenized_data_path + 'tokenized_train_{}.txt'.format(i),
                  'r') as f:
            full_len += len([int(item) for item in f.read().strip().split()])
    total_steps = int(full_len / stride * epochs / batch_size /
                      gradient_accumulation)
    print('total steps = {}'.format(total_steps))

    optimizer = transformers.AdamW(model.parameters(),
                                   lr=lr,
                                   correct_bias=True)
    scheduler = transformers.WarmupLinearSchedule(optimizer,
                                                  warmup_steps=warmup_steps,
                                                  t_total=total_steps)
    if fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=fp16_opt_level)

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = DataParallel(model)
        multi_gpu = True
    print('starting training')
    running_loss = 0
    for epoch in range(epochs):
        print('epoch {}'.format(epoch + 1))
        now = datetime.now()
        print('time: {}'.format(now))
        x = np.linspace(0, num_pieces - 1, num_pieces, dtype=np.int32)
        random.shuffle(x)
        piece_num = 0
        for i in x:
            with open(tokenized_data_path + 'tokenized_train_{}.txt'.format(i),
                      'r') as f:
                line = f.read().strip()
            tokens = line.split()
            tokens = [int(token) for token in tokens]
            start_point = 0
            samples = []
            while start_point < len(tokens) - n_ctx:
                samples.append(tokens[start_point:start_point + n_ctx])
                start_point += stride
            if start_point < len(tokens):
                samples.append(tokens[len(tokens) - n_ctx:])
            random.shuffle(samples)
            for step in range(len(samples) // batch_size):

                #  prepare data
                batch = samples[step * batch_size:(step + 1) * batch_size]
                batch_labels = []
                batch_inputs = []
                for ids in batch:
                    int_ids_for_labels = [int(x) for x in ids]
                    int_ids_for_inputs = [int(x) for x in ids]
                    batch_labels.append(int_ids_for_labels)
                    batch_inputs.append(int_ids_for_inputs)
                batch_labels = torch.tensor(batch_labels).long().to(device)
                batch_inputs = torch.tensor(batch_inputs).long().to(device)

                #  forward pass
                outputs = model.forward(input_ids=batch_inputs,
                                        labels=batch_labels)
                loss, logits = outputs[:2]

                #  get loss
                if multi_gpu:
                    loss = loss.mean()
                if gradient_accumulation > 1:
                    loss = loss / gradient_accumulation

                #  loss backward
                if fp16:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                        torch.nn.utils.clip_grad_norm_(
                            amp.master_params(optimizer), max_grad_norm)
                else:
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   max_grad_norm)

                #  optimizer step
                if (step + 1) % gradient_accumulation == 0:
                    running_loss += loss.item()
                    optimizer.step()
                    optimizer.zero_grad()
                    scheduler.step()
                if (step + 1) % log_step == 0:
                    print(
                        'now time: {}:{}. Step {} of piece {} of epoch {}, loss {}'
                        .format(datetime.now().hour,
                                datetime.now().minute,
                                (step + 1) // gradient_accumulation, piece_num,
                                epoch + 1, running_loss / log_step))
                    running_loss = 0
            piece_num += 1

        print('saving model for epoch {}'.format(epoch + 1))
        if not os.path.exists(output_dir + 'model_epoch{}'.format(epoch + 1)):
            os.mkdir(output_dir + 'model_epoch{}'.format(epoch + 1))
        model_to_save = model.module if hasattr(model, 'module') else model
        model_to_save.save_pretrained(output_dir +
                                      'model_epoch{}'.format(epoch + 1))
        # torch.save(scheduler.state_dict(), output_dir + 'model_epoch{}/scheduler.pt'.format(epoch + 1))
        # torch.save(optimizer.state_dict(), output_dir + 'model_epoch{}/optimizer.pt'.format(epoch + 1))
        print('epoch {} finished'.format(epoch + 1))

        then = datetime.now()
        print('time: {}'.format(then))
        print('time for one epoch: {}'.format(then - now))

    print('training finished')
    if not os.path.exists(output_dir + 'final_model'):
        os.mkdir(output_dir + 'final_model')
    model_to_save = model.module if hasattr(model, 'module') else model
    model_to_save.save_pretrained(output_dir + 'final_model')
    # torch.save(scheduler.state_dict(), output_dir + 'final_model/scheduler.pt')
    # torch.save(optimizer.state_dict(), output_dir + 'final_model/optimizer.pt')

    if args.google_driver_save:
        # Import PyDrive and associated libraries.
        # This only needs to be done once in a notebook.
        from pydrive.auth import GoogleAuth
        from pydrive.drive import GoogleDrive
        from google.colab import auth
        from oauth2client.client import GoogleCredentials

        # Authenticate and create the PyDrive client.
        # This only needs to be done once in a notebook.
        auth.authenticate_user()
        gauth = GoogleAuth()
        gauth.credentials = GoogleCredentials.get_application_default()
        drive = GoogleDrive(gauth)

        # Create & upload a text file.
        uploaded = drive.CreateFile({"title": "config.json"})
        # Read file and set it as a content of this instance.
        uploaded.SetContentFile(
            '/content/GPT-2-Train/model/final_model/config.json')
        uploaded.Upload()  # Upload the file.
        print('Uploaded file with ID {}'.format(uploaded.get('id')))

        uploaded = drive.CreateFile({"title": "pytorch_model.bin"})
        # Read file and set it as a content of this instance.
        uploaded.SetContentFile(
            '/content/GPT-2-Train/model/final_model/pytorch_model.bin')
        uploaded.Upload()  # Upload the file.
        print('Uploaded file with ID {}'.format(uploaded.get('id')))
Example #22
0
def get_qcs_objects_for_notebook(
        project_id: Optional[str] = None,
        processor_id: Optional[str] = None) -> QCSObjectsForNotebook:
    """Authenticates on Google Cloud, can return a Device and Simulator.

    Args:
        project_id: Optional explicit Google Cloud project id. Otherwise,
            this defaults to the environment variable GOOGLE_CLOUD_PROJECT.
            By using an environment variable, you can avoid hard-coding
            personal project IDs in shared code.
        processor_id: Engine processor ID (from Cloud console or
            ``Engine.list_processors``).

    Returns:
        An instance of DeviceSamplerInfo.
    """

    # Converting empty strings to None for form field inputs
    if project_id == "":
        project_id = None
    if processor_id == "":
        processor_id = None

    google_cloud_signin_failed: bool = False
    if project_id is None:
        if 'GOOGLE_CLOUD_PROJECT' not in os.environ:
            print(
                "No project_id provided and environment variable GOOGLE_CLOUD_PROJECT not set."
            )
            google_cloud_signin_failed = True
    else:  # pragma: no cover
        os.environ['GOOGLE_CLOUD_PROJECT'] = project_id

        # Following code runs the user through the Colab OAuth process.

        # Checks for Google Application Default Credentials and runs
        # interactive login if the notebook is executed in Colab. In
        # case the notebook is executed in Jupyter notebook or other
        # IPython runtimes, no interactive login is provided, it is
        # assumed that the `GOOGLE_APPLICATION_CREDENTIALS` env var is
        # set or `gcloud auth application-default login` was executed
        # already. For more information on using Application Default Credentials
        # see https://cloud.google.com/docs/authentication/production

        in_colab = False
        try:
            from IPython import get_ipython

            in_colab = 'google.colab' in str(get_ipython())

            if in_colab:
                from google.colab import auth

                print("Getting OAuth2 credentials.")
                print("Press enter after entering the verification code.")
                auth.authenticate_user(clear_output=False)
                print("Authentication complete.")
            else:
                print("Notebook isn't executed with Colab, assuming "
                      "Application Default Credentials are setup.")
        except:
            pass

        # End of Google Colab Authentication segment

    device: cirq.Device
    sampler: Union[PhasedFSimEngineSimulator, QuantumEngineSampler]
    if google_cloud_signin_failed or processor_id is None:
        print("Using a noisy simulator.")
        sampler = PhasedFSimEngineSimulator.create_with_random_gaussian_sqrt_iswap(
            mean=SQRT_ISWAP_INV_PARAMETERS,
            sigma=PhasedFSimCharacterization(theta=0.01,
                                             zeta=0.10,
                                             chi=0.01,
                                             gamma=0.10,
                                             phi=0.02),
        )
        device = Sycamore
    else:  # pragma: no cover
        device = get_engine_device(processor_id)
        sampler = get_engine_sampler(processor_id, gate_set_name="sqrt_iswap")
    return QCSObjectsForNotebook(
        device=device,
        sampler=sampler,
        signed_in=not google_cloud_signin_failed,
    )
Example #23
0
def auth():
    # noinspection PyUnresolvedReferences
    from google.colab import auth
    auth.authenticate_user()
Example #24
0
    def __init__(self):
        auth.authenticate_user()
        # prompt the user to access his Google Drive via the API

        self.drive_service = build('drive', 'v3')
        self.default_folder = self.find_items('Colab Notebooks')[0]
Example #25
0
def load_data_path(folder_id, colab_path='/root/data/', local_path='../data/',
                   mime_types=['csv', 'zip']):
    """Boilerplate to download data from Google Drive into Colab
    notebook or to point to local data folder

    Behavior:
    ---------
    1. Identify if Notebook is running in Colab
    2. If Yes, then
        a. do Google OAuth login (requires user interaction)
        b. create a data folder in Colab (colab_path)
        c. Search for all CSV files in Google Drive folder
        d. Copy all CSV files from G Drive into colab_path folder
        e. Return the colab_path variable
    3. If No, then
        a. Return the local_path variable

    Example 1:
    ----------
        !pip install colabtweak
        from colabtweak import load_data_path
        folder_id = "kasdhkfhjkashfjadskjfjsalk"
        data_path = load_data_path(folder_id)

        import pandas as pd
        df = pd.read_csv(data_path + "train.csv")
        df.head()

    Example 2:
    ----------
        !pip install colabtweak
        from colabtweak import load_data_path
        folder_id = "kasdhkfhjkashfjadskjfjsalk"
        colab_path = "/root/somecustomfolderincolab/"
        local_path = "../localsiblingprojectfolder/
        data_path = load_data_path(
            folder_id, colab_path=colab_path, local_path=local_path)

    """

    if 'google.colab' in sys.modules:
        print("Notebook is running in Colab")

        if folder_id is None:
            print((
                "Folder ID is missing.\n"
                "Click on the Google Drive folder and check your URL\n"
                "'https://drive.google.com/drive/u/0/folders/<folder_id>'"))

        # Login
        from google.colab import auth
        auth.authenticate_user()
        gauth = GoogleAuth()
        gauth.credentials = GoogleCredentials.get_application_default()
        drive = GoogleDrive(gauth)

        # create "~/data" folder within the Colab image
        download_path = os.path.expanduser(colab_path)
        try:
            os.makedirs(download_path)
        except FileExistsError:
            pass

        # Extract the FileIDs from the Google Drive directory
        tmp = ' or '.join(["title contains '." + m + "'" for m in mime_types])
        querystr = "(" + tmp + ") and '" + folder_id + "' in parents"
        listed = drive.ListFile({'q': querystr}).GetList()

        # Copy all files
        for file in listed:
            try:
                print('{} {}'.format(file['id'], file['title']))
                output_file = os.path.join(download_path, file['title'])
                temp_file = drive.CreateFile({'id': file['id']})
                temp_file.GetContentFile(output_file)
            except Exception as e:
                print(e)

        # Set directory path
        return colab_path

    else:
        print("Notebook is running in Jupyter")
        return local_path
Example #26
0
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()

!kill -9 -1

from google.colab import drive
drive.mount('/content/drive')

# Code to read csv file into colaboratory:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

downloaded = drive.CreateFile({'id':'1oF7toJFWt-tox50GM8I2AT_fvYITkgzZ'}) # replace the id with id of file you want to access
downloaded.GetContentFile('Data_namechanged.pkl')

# Commented out IPython magic to ensure Python compatibility.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# %matplotlib inline
import math
Example #27
0
def dqn_learing(env,
                q_func,
                optimizer_spec,
                exploration,
                stopping_criterion=None,
                replay_buffer_size=1000000,
                batch_size=32,
                gamma=0.99,
                learning_starts=50000,
                learning_freq=4,
                frame_history_len=4,
                target_update_freq=10000):
    """Run Deep Q-learning algorithm.

    You can specify your own convnet using q_func.

    All schedules are w.r.t. total number of steps taken in the environment.

    Parameters
    ----------
    env: gym.Env
        gym environment to train on.
    q_func: function
        Model to use for computing the q function. It should accept the
        following named arguments:
            input_channel: int
                number of channel of input.
            num_actions: int
                number of actions
    optimizer_spec: OptimizerSpec
        Specifying the constructor and kwargs, as well as learning rate schedule
        for the optimizer
    exploration: Schedule (defined in utils.schedule)
        schedule for probability of chosing random action.
    stopping_criterion: (env) -> bool
        should return true when it's ok for the RL algorithm to stop.
        takes in env and the number of steps executed so far.
    replay_buffer_size: int
        How many memories to store in the replay buffer.
    batch_size: int
        How many transitions to sample each time experience is replayed.
    gamma: float
        Discount Factor
    learning_starts: int
        After how many environment steps to start replaying experiences
    learning_freq: int
        How many steps of environment to take between every experience replay
    frame_history_len: int
        How many past frames to include as input to the model.
    target_update_freq: int
        How many experience replay rounds (not steps!) to perform between
        each update to the target Q network
    """
    assert type(env.observation_space) == gym.spaces.Box
    assert type(env.action_space) == gym.spaces.Discrete
    Statistic['parameters'] = {
        'replay_buffer_size': replay_buffer_size,
        'batch_size': batch_size,
        'gamma': gamma,
        'frame_history_len': frame_history_len,
        'learning_starts': learning_starts,
        'learning_freq': learning_freq,
        'target_update_freq': target_update_freq,
        'name': env.env.unwrapped.spec.id
    }
    ###############
    # BUILD MODEL #
    ###############

    if len(env.observation_space.shape) == 1:
        # This means we are running on low-dimensional observations (e.g. RAM)
        input_arg = env.observation_space.shape[0]
    else:
        img_h, img_w, img_c = env.observation_space.shape
        input_arg = frame_history_len * img_c
    num_actions = env.action_space.n

    # Construct an epilson greedy policy with given exploration schedule
    def select_epilson_greedy_action(model, obs, t):
        sample = random.random()
        eps_threshold = exploration.value(t)
        if sample > eps_threshold:
            obs = torch.from_numpy(obs).type(dtype).unsqueeze(0) / 255.0
            # Use volatile = True if variable is only used in inference mode, i.e. don’t save the history
            with torch.no_grad():
                return model(Variable(obs)).data.max(1)[1].cpu()
        else:
            return torch.IntTensor([[random.randrange(num_actions)]])

    # Initialize target q function and q function, i.e. build the model.
    ######

    Q = q_func(input_arg, num_actions).type(dtype)
    target_Q = q_func(input_arg, num_actions).type(dtype)

    if USE_CUDA:
        Q = Q.cuda()
        target_Q = target_Q.cuda()

    ######

    # Construct Q network optimizer function
    optimizer = optimizer_spec.constructor(Q.parameters(),
                                           **optimizer_spec.kwargs)

    # Construct the replay buffer
    replay_buffer = ReplayBuffer(replay_buffer_size, frame_history_len)

    ###############
    # RUN ENV     #
    ###############
    num_param_updates = 0
    mean_episode_reward = -float('nan')
    best_mean_episode_reward = -float('inf')
    last_obs = env.reset()
    LOG_EVERY_N_STEPS = 10000
    filename = 'statistics.pkl'

    # Google Drive
    try:
        import google.colab
        IN_COLAB = True
    except:
        IN_COLAB = False

    if IN_COLAB:
        run_in_colab_message()
        try:
            from google.colab import auth
            import logging
            from pydrive.auth import GoogleAuth
            from pydrive.drive import GoogleDrive
            from oauth2client.client import GoogleCredentials
            logging.getLogger('googleapicliet.discovery_cache').setLevel(
                logging.ERROR)
            auth.authenticate_user()
            gauth = GoogleAuth()
            gauth.credentials = GoogleCredentials.get_application_default()
            drive = GoogleDrive(gauth)
        except:
            pass

    iter_time = time()

    for t in count():
        ### 1. Check stopping criterion
        if stopping_criterion is not None and stopping_criterion(env):
            break

        ### 2. Step the env and store the transition
        # At this point, "last_obs" contains the latest observation that was
        # recorded from the simulator. Here, your code needs to store this
        # observation and its outcome (reward, next observation, etc.) into
        # the replay buffer while stepping the simulator forward one step.
        # At the end of this block of code, the simulator should have been
        # advanced one step, and the replay buffer should contain one more
        # transition.
        # Specifically, last_obs must point to the new latest observation.
        # Useful functions you'll need to call:
        # obs, reward, done, info = env.step(action)
        # this steps the environment forward one step
        # obs = env.reset()
        # this resets the environment if you reached an episode boundary.
        # Don't forget to call env.reset() to get a new observation if done
        # is true!!
        # Note that you cannot use "last_obs" directly as input
        # into your network, since it needs to be processed to include context
        # from previous frames. You should check out the replay buffer
        # implementation in dqn_utils.py to see what functionality the replay
        # buffer exposes. The replay buffer has a function called
        # encode_recent_observation that will take the latest observation
        # that you pushed into the buffer and compute the corresponding
        # input that should be given to a Q network by appending some
        # previous frames.
        # Don't forget to include epsilon greedy exploration!
        # And remember that the first time you enter this loop, the model
        # may not yet have been initialized (but of course, the first step
        # might as well be random, since you haven't trained your net...)
        #####

        idx = replay_buffer.store_frame(last_obs)
        enc_obs = replay_buffer.encode_recent_observation()

        if t > learning_starts:
            action = select_epilson_greedy_action(Q, enc_obs, t)
        else:
            action = torch.IntTensor([[random.randrange(num_actions)]])

        obs, reward, done, info = env.step(action)
        if done:
            obs = env.reset()

        replay_buffer.store_effect(idx, action, reward, done)

        last_obs = obs

        #####

        # at this point, the environment should have been advanced one step (and
        # reset if done was true), and last_obs should point to the new latest
        # observation

        ### 3. Perform experience replay and train the network.
        # Note that this is only done if the replay buffer contains enough samples
        # for us to learn something useful -- until then, the model will not be
        # initialized and random actions should be taken
        if (t > learning_starts and t % learning_freq == 0
                and replay_buffer.can_sample(batch_size)):
            # Here, you should perform training. Training consists of four steps:
            # 3.a: use the replay buffer to sample a batch of transitions (see the
            # replay buffer code for function definition, each batch that you sample
            # should consist of current observations, current actions, rewards,
            # next observations, and done indicator).
            # Note: Move the variables to the GPU if avialable
            # 3.b: fill in your own code to compute the Bellman error. This requires
            # evaluating the current and next Q-values and constructing the corresponding error.
            # Note: don't forget to clip the error between [-1,1], multiply is by -1 (since pytorch minimizes) and
            #       maskout post terminal status Q-values (see ReplayBuffer code).
            # 3.c: train the model. To do this, use the bellman error you calculated perviously.
            # Pytorch will differentiate this error for you, to backward the error use the following API:
            #       current.backward(d_error.data.unsqueeze(1))
            # Where "current" is the variable holding current Q Values and d_error is the clipped bellman error.
            # Your code should produce one scalar-valued tensor.
            # Note: don't forget to call optimizer.zero_grad() before the backward call and
            #       optimizer.step() after the backward call.
            # 3.d: periodically update the target network by loading the current Q network weights into the
            #      target_Q network. see state_dict() and load_state_dict() methods.
            #      you should update every target_update_freq steps, and you may find the
            #      variable num_param_updates useful for this (it was initialized to 0)
            #####

            #3.a
            obs_batch, act_batch, rew_batch, next_obs_batch, done_mask = replay_buffer.sample(
                batch_size)
            obs_batch = Variable(torch.from_numpy(obs_batch).type(dtype) /
                                 255.,
                                 requires_grad=True)
            act_batch = Variable(torch.from_numpy(act_batch).type(torch.int64))
            rew_batch = Variable(torch.from_numpy(rew_batch).type(dtype),
                                 requires_grad=True)
            next_obs_batch = Variable(
                torch.from_numpy(next_obs_batch).type(dtype) / 255.,
                requires_grad=True)
            done_mask = Variable(torch.from_numpy(done_mask).type(torch.int64))

            if USE_CUDA:
                obs_batch = obs_batch.cuda()
                act_batch = act_batch.cuda()
                rew_batch = rew_batch.cuda()
                next_obs_batch = next_obs_batch.cuda()
                done_mask = done_mask.cuda()

            # Q network
            val = Q(obs_batch).gather(dim=1, index=act_batch.unsqueeze(1))

            # Q target network
            with torch.no_grad():
                tar_val_t = target_Q(next_obs_batch).max(1)[0]
            tar_val = torch.addcmul(rew_batch, gamma,
                                    1 - done_mask.type(dtype), tar_val_t)

            # 3.b error calculate
            d_error = (tar_val - val.squeeze()).clamp_(-1, 1) * -1.
            # d_error = torch.pow((tar_val - val.squeeze()).clamp_(-1, 1), 2) * -1.

            # 3.c train Q network
            optimizer.zero_grad()
            val.backward(d_error.data.unsqueeze(1))
            optimizer.step()

            # 3.d update target network
            num_param_updates += 1
            if num_param_updates % target_update_freq == 0:
                target_Q.load_state_dict(Q.state_dict())
            #####

        ### 4. Log progress and keep track of statistics
        episode_rewards = get_wrapper_by_name(env,
                                              "Monitor").get_episode_rewards()
        if len(episode_rewards) > 0:
            mean_episode_reward = np.mean(episode_rewards[-100:])
        if len(episode_rewards) > 100:
            best_mean_episode_reward = max(best_mean_episode_reward,
                                           mean_episode_reward)

        Statistic["mean_episode_rewards"].append(mean_episode_reward)
        Statistic["best_mean_episode_rewards"].append(best_mean_episode_reward)

        if t % LOG_EVERY_N_STEPS == 0 and t > learning_starts:
            print("Timestep %d" % (t, ))
            print(f"Iteration time:{time()-iter_time:.2f}")
            iter_time = time()
            print("mean reward (100 episodes) %f" % mean_episode_reward)
            print("best mean reward %f" % best_mean_episode_reward)
            print("episodes %d" % len(episode_rewards))
            print("exploration %f" % exploration.value(t))
            sys.stdout.flush()

            # Dump statistics to pickle
            filename = f"{t}" + 'statistics.pkl' if IN_COLAB else 'statistics.pkl'
            with open(filename, 'wb') as f:
                pickle.dump(Statistic, f)
                print("Saved to %s" % filename)
            if IN_COLAB and t % (LOG_EVERY_N_STEPS * 10) == 0:
                try:
                    stat_pkl = drive.CreateFile()
                    stat_pkl.SetContentFile(filename)
                    stat_pkl.Upload()
                    print("Uploaded to drive")
                except Exception:
                    print("Exception during upload to drive")