def randomSteps(env,steps,dqn):
    t0 = time.time()
    env.reset()
    i = 0
    frame_stack = Pipe(4)
    initial_no_op = np.random.randint(4, NO_OP_MAX)

    for _ in range(0,steps):
        if i < initial_no_op:
            # WE PERFORM A RANDOM NUMBER OF NO_OP ACTIONS
            action = NO_OP_CODE
            state, reward, done, info = env.step(action)
            greyObservation = rgb2gray(state)
            state = downSample(greyObservation)
            frame_stack.push(state)
            i += 1
        else:

            state = np.stack(frame_stack.items, axis=2).reshape((IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))

            action = np.random.randint(0, len(ACTIONS))
            actionH =np.zeros(len(ACTIONS))
            actionH[action] = 1
            next_state, reward, game_over, info = env.step(action)


            greyObservation = rgb2gray(next_state)
            next_state = downSample(greyObservation)

            frame_stack.push(next_state)

            next_state = np.stack(frame_stack.items, axis=2).reshape((IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))

            dqn.storeExperience(
                state.astype(type),
                actionH,
                reward,
                next_state.astype(type),
                game_over)
            if done:
                #print("Episode finished after {} timesteps".format(_ + 1))
                env.reset()
                i=0
                frame_stack=[]




    t1 = time.time()
    print("Fullfilling replay memory operation took:",t1-t0,)
    print('Size of replay memory %s bytes and has %s elements' % ((sys.getsizeof(dqn.replayMemory)),len(dqn.replayMemory)))
    print
    initial_no_op = np.random.randint(4, NO_OP_MAX)
    i=0
    frame_stack=Pipe(4)
    score=0
    summary_writer = tf.summary.FileWriter('logs',sess.graph)
    summary_writer = tf.summary.FileWriter(LOG_DIRECTORY + RUN_STRING,
                                           sess.graph)
    print('Started training')
    for step in range(STEPS):
        if i < initial_no_op:
            # WE PERFORM A RANDOM NUMBER OF NO_OP ACTIONS
            action = NO_OP_CODE
            state, reward, done, info = env.step(action)
            greyObservation = rgb2gray(state)
            state = downSample(greyObservation)
            frame_stack.push(state)
            i+=1
        else:

            state = np.stack(frame_stack.items, axis=2).reshape((IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))

            action = dqn.selectAction(state,step)
            actionN = np.argmax(dqn.selectAction(state,step))

            next_state, reward, game_over, info = env.step(actionN)
            greyObservation = rgb2gray(next_state)
            next_state = downSample(greyObservation)

            frame_stack.push(next_state)

            next_state = np.stack(frame_stack.items, axis=2).reshape((IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))