parser.add_argument(
    '--out_dir',
    type=str,
    default="",
    help="Directory to save the models state dict (No default)")
parser.add_argument('--k', type=int, default="1", help="k for k-mers")
parser.add_argument('--l2_penalty', type=float, default=0.0005)
parser.add_argument('--save_every',
                    type=int,
                    default="20",
                    help="Num of iterations required to save the model")
opt = parser.parse_args()
print(opt)

human_sequences, _ = dp.load_FASTA(opt.main_data_dir + 'human_sequences.fasta')
human_train_idx, human_valid_idx, human_test_idx, human_train_labels, human_valid_labels, human_test_labels, human_GO_terms = dp.load_test_sets(
    opt.main_data_dir + 'human_annotations_temporal_holdout.mat')

# Create train, validation, and test sets from the full list of human proteins
human_train_sequences = [human_sequences[i] for i in human_train_idx]
human_valid_sequences = [human_sequences[i] for i in human_valid_idx]
human_test_sequences = [human_sequences[i] for i in human_test_idx]

# Truncate longest sequences
human_train_sequences[6640] = human_train_sequences[6640][:5000]
human_train_sequences[6613] = human_train_sequences[6613][:5000]

# Create lengths for sequence representation averaging in FastText
human_train_lengths = dp.sequence_lengths_with_kmers(human_train_sequences,
                                                     opt.k)
human_valid_lengths = dp.sequence_lengths_with_kmers(human_valid_sequences,
                                                     opt.k)
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

# In[2]:

# ## Load Data and Create Train/Dev/Test Sets

# ### Human sequences

# In[2]:

human_sequences, _ = dp.load_FASTA('../capstone_data/human_sequences.fasta')
human_train_idx, human_valid_idx, human_test_idx, human_train_labels, human_valid_labels, human_test_labels, human_GO_terms = dp.load_test_sets(
    '../capstone_data/human_annotations_temporal_holdout.mat')

# Create train, validation, and test sets from the full list of human proteins
human_train_sequences = [human_sequences[i] for i in human_train_idx]
human_valid_sequences = [human_sequences[i] for i in human_valid_idx]
human_test_sequences = [human_sequences[i] for i in human_test_idx]

# Convert corresponding labels for train, validation, and test sets
# from the full list of human proteins.
human_train_labels = torch.from_numpy(human_train_labels).type(
    torch.LongTensor)
human_valid_labels = torch.from_numpy(human_valid_labels).type(
    torch.LongTensor)
human_test_labels = torch.from_numpy(human_test_labels).type(torch.LongTensor)

# Create lengths for sequence representation averaging in FastText
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

output_file = 'lstm_gpu_output/human/baseline'

#Hyper-parameters
emb_dim = 150
hidden_dim = 750
L2_penalty = 0.0005

# ## Load Data and Create Train/Dev/Test Sets

human_sequences, _ = dp.load_FASTA('data/human_sequences.fasta')
human_train_idx, human_valid_idx, human_test_idx, human_train_labels, human_valid_labels, human_test_labels, human_GO_terms = dp.load_test_sets(
    'data/human_annotations_temporal_holdout.mat')

# Create train, validation, and test sets from the full list of human proteins
human_train_sequences = [human_sequences[i] for i in human_train_idx]
human_valid_sequences = [human_sequences[i] for i in human_valid_idx]
human_test_sequences = [human_sequences[i] for i in human_test_idx]

# Convert corresponding labels for train, validation, and test sets
# from the full list of human proteins.
human_train_labels = torch.from_numpy(human_train_labels).type(
    torch.LongTensor)
human_valid_labels = torch.from_numpy(human_valid_labels).type(
    torch.LongTensor)
human_test_labels = torch.from_numpy(human_test_labels).type(torch.LongTensor)

# Create lengths for sequence representation averaging in FastText
from torch.autograd import Variable
import torch.nn.functional as F

output_file = 'lstm_gpu_output/yeast/baseline/'

# Set Hyper-parameters:
emb_dim = 250  # dimension for n-gram embedding
hidden_dim = 750
L2_penalty = 0.0005

# ## Load Data and Create Train/Dev/Test Sets

# Load yeast sequences and training data
yeast_sequences, yeast_protein_names = dp.load_FASTA(
    'data/yeast_sequences.fasta')
yeast_train_idx, yeast_valid_idx, yeast_test_idx, yeast_train_labels, yeast_valid_labels, yeast_test_labels, yeast_GO_terms = dp.load_test_sets(
    'data/yeast_MF_temporal_holdout.mat')

# Create train, validation, and test sets from the full list of yeast proteins
yeast_train_sequences = [yeast_sequences[i] for i in yeast_train_idx]
yeast_valid_sequences = [yeast_sequences[i] for i in yeast_valid_idx]
yeast_test_sequences = [yeast_sequences[i] for i in yeast_test_idx]

# Convert corresponding labels for train, validation, and test sets from the full list of yeast proteins.
yeast_train_labels = torch.from_numpy(yeast_train_labels).type(
    torch.LongTensor)
yeast_valid_labels = torch.from_numpy(yeast_valid_labels).type(
    torch.LongTensor)
yeast_test_labels = torch.from_numpy(yeast_test_labels).type(torch.LongTensor)

# Create lengths for sequence representation averaging in FastText
yeast_train_lengths = dp.sequence_lengths(yeast_train_sequences)